In [83]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
In [84]:
df = pd.read_csv('itineraries_sample_500.csv')
In [85]:
df['day_to_dep'] = pd.to_datetime(df['flightDate']) - pd.to_datetime(df['searchDate'])
df['day_to_dep'].head(50)
Out[85]:
0 2 days 1 9 days 2 5 days 3 1 days 4 4 days 5 5 days 6 8 days 7 6 days 8 3 days 9 12 days 10 8 days 11 11 days 12 4 days 13 8 days 14 11 days 15 2 days 16 7 days 17 5 days 18 7 days 19 10 days 20 5 days 21 2 days 22 6 days 23 11 days 24 4 days 25 4 days 26 1 days 27 7 days 28 3 days 29 1 days 30 4 days 31 8 days 32 4 days 33 1 days 34 1 days 35 7 days 36 8 days 37 6 days 38 3 days 39 3 days 40 4 days 41 10 days 42 10 days 43 1 days 44 1 days 45 1 days 46 10 days 47 1 days 48 7 days 49 1 days Name: day_to_dep, dtype: timedelta64[ns]
In [86]:
# Group by startingAirport, destinationAirport, and flightDate, and count the number of flights
top_flights = df.groupby(['startingAirport', 'destinationAirport', 'flightDate']).size().reset_index(name='flight_count')
# Sort by flight_count in descending order and get the top 10
top_flights = top_flights.sort_values(by='flight_count', ascending=False).head(10)
print(top_flights)
startingAirport destinationAirport flightDate flight_count 32769 LGA ORD 2022-09-13 47 1796 ATL LAX 2022-08-30 47 27642 LAX BOS 2022-08-21 46 32140 LGA LAX 2022-09-21 44 29372 LAX LGA 2022-09-13 44 29367 LAX LGA 2022-09-08 44 27421 LAX ATL 2022-08-12 44 29971 LAX ORD 2022-08-09 43 1806 ATL LAX 2022-09-09 43 30002 LAX ORD 2022-09-09 42
In [87]:
# Group the dataframe by departure airport, arrival airport, and departure date
grouped = df.groupby(['startingAirport', 'destinationAirport'])
# Extract unique flight identifiers
unique_flights = list(grouped.groups.keys()) # List of (start_airport, dest_airport, dep_date) tuples
# Select 10 unique flights
selected_flights = unique_flights[:10] # Modify this if you want a random selection
# List to store DataFrames
flight_price_changes = []
# Iterate through selected flights
for flight in selected_flights:
# start_airport, dest_airport, dep_date = flight
start_airport, dest_airport = flight
# Get the corresponding DataFrame
flight_df = grouped.get_group(flight).copy()
# Sort by days to departure
flight_df = flight_df.sort_values(by='day_to_dep')
# Append to list
flight_price_changes.append(flight_df)
# Print the first few rows of each DataFrame in the list
for i, flight_df in enumerate(flight_price_changes):
print(f"Flight {i+1}:")
print(flight_df.head(), "\n")
Flight 1:
legId searchDate flightDate \
265868 1b24e4d54f7c8d28063d298ed47e23f2 2022-08-04 2022-08-05
372668 ce88de3ce7239c38c6dcfb0dc604ce7a 2022-09-09 2022-09-10
380736 d17b007cc10e1369c8c784a50e74d956 2022-09-09 2022-09-10
212001 e3e40b3b6af11d461414937372dc7366 2022-08-29 2022-08-30
199297 4fbd160789722372f5d3c4c6da7e6cc9 2022-07-25 2022-07-26
startingAirport destinationAirport fareBasisCode travelDuration \
265868 ATL BOS G0AIZNN1 PT6H59M
372668 ATL BOS KA0NX0MC PT2H36M
380736 ATL BOS G0AJZNN1 PT6H52M
212001 ATL BOS UA0NX0MQ PT10H33M
199297 ATL BOS G0AIZNN1 PT4H39M
elapsedDays isBasicEconomy isRefundable ... \
265868 0 False False ...
372668 0 False False ...
380736 0 False False ...
212001 1 False False ...
199297 0 False False ...
segmentsArrivalAirportCode segmentsDepartureAirportCode \
265868 CLT||BOS ATL||CLT
372668 BOS ATL
380736 MIA||BOS ATL||MIA
212001 DCA||BOS ATL||DCA
199297 PHL||BOS ATL||PHL
segmentsAirlineName segmentsAirlineCode \
265868 American Airlines||American Airlines AA||AA
372668 Delta DL
380736 American Airlines||American Airlines AA||AA
212001 Delta||Delta DL||DL
199297 American Airlines||American Airlines AA||AA
segmentsEquipmentDescription segmentsDurationInSeconds \
265868 Canadair Regional Jet 900||Airbus A321 4920||7440
372668 Airbus A321 9360
380736 Airbus A319||Boeing 737-800 6960||11820
212001 Airbus A320||Embraer 175 6600||5880
199297 Canadair Regional Jet 900||Airbus A321 7920||4980
segmentsDistance segmentsCabinCode strata day_to_dep
265868 228||728 coach||coach 2022-08 1 days
372668 947 coach 2022-09 1 days
380736 596||1260 coach||coach 2022-09 1 days
212001 541||406 coach||coach 2022-08 1 days
199297 667||280 coach||coach 2022-07 1 days
[5 rows x 29 columns]
Flight 2:
legId searchDate flightDate \
21058 bbad056d745a752aaa4ac64523e3f2dd 2022-05-24 2022-05-25
17599 b281726ac59fd79c072688ec75bea94f 2022-05-08 2022-05-09
8204 b7b0437b68caaa51a0e35b520e701e9b 2022-04-30 2022-05-01
139881 25c97c43caf3b9371de8489a706babdb 2022-07-01 2022-07-02
48333 2c1038d1a6e77c759fabcd9ba7c76e73 2022-04-30 2022-05-01
startingAirport destinationAirport fareBasisCode travelDuration \
21058 ATL CLT MA0QA0MQ PT1H14M
17599 ATL CLT M0AHZNN1 PT1H21M
8204 ATL CLT M0AHZNN1 PT5H21M
139881 ATL CLT MA0QA0MQ PT1H14M
48333 ATL CLT M0AHZNN1 PT7H41M
elapsedDays isBasicEconomy isRefundable ... \
21058 0 False False ...
17599 0 False False ...
8204 0 False False ...
139881 0 False False ...
48333 0 False False ...
segmentsArrivalAirportCode segmentsDepartureAirportCode \
21058 CLT ATL
17599 CLT ATL
8204 MIA||CLT ATL||MIA
139881 CLT ATL
48333 MIA||CLT ATL||MIA
segmentsAirlineName segmentsAirlineCode \
21058 Delta DL
17599 American Airlines AA
8204 American Airlines||American Airlines AA||AA
139881 Delta DL
48333 American Airlines||American Airlines AA||AA
segmentsEquipmentDescription segmentsDurationInSeconds \
21058 Boeing 717 4440
17599 Canadian Regional Jet 700 4860
8204 Airbus A319||Boeing 737-800 7260||8100
139881 Boeing 717 4440
48333 Airbus A319||Boeing 737-800 7020||7680
segmentsDistance segmentsCabinCode strata day_to_dep
21058 228 coach 2022-05 1 days
17599 228 coach 2022-05 1 days
8204 596||652 coach||coach 2022-05 1 days
139881 228 coach 2022-07 1 days
48333 596||652 coach||coach 2022-05 1 days
[5 rows x 29 columns]
Flight 3:
legId searchDate flightDate \
154162 a2af6905cf96ee472e4e448fe11ae1e6 2022-07-02 2022-07-03
231911 021bdeba2c2f79d077ccbf557e7d7066 2022-08-17 2022-08-18
287060 d2b3d45f38c9f7f6fab83344f9438d85 2022-08-28 2022-08-29
112433 2fdd83601291ddc67878e750053f6f9e 2022-06-11 2022-06-12
7769 587bda3af997225927e42d78cfd50a52 2022-05-29 2022-05-30
startingAirport destinationAirport fareBasisCode travelDuration \
154162 ATL DEN L00YXS2 PT3H13M
231911 ATL DEN VAA0AKEN PT5H55M
287060 ATL DEN S0AIZSN3 PT5H55M
112433 ATL DEN M0AIZNN1 PT7H29M
7769 ATL DEN YNR PT8H30M
elapsedDays isBasicEconomy isRefundable ... \
154162 0 False False ...
231911 0 False False ...
287060 0 False False ...
112433 0 False False ...
7769 0 False False ...
segmentsArrivalAirportCode segmentsDepartureAirportCode \
154162 DEN ATL
231911 ORD||DEN ATL||ORD
287060 ORD||DEN ATL||ORD
112433 DFW||DEN ATL||DFW
7769 LAS||DEN ATL||LAS
segmentsAirlineName segmentsAirlineCode \
154162 Frontier Airlines F9
231911 United||United UA||UA
287060 American Airlines||American Airlines AA||AA
112433 American Airlines||American Airlines AA||AA
7769 Spirit Airlines||Spirit Airlines NK||NK
segmentsEquipmentDescription \
154162 NaN
231911 Airbus A319||Boeing 757-300
287060 Airbus A319||Boeing 737-800
112433 Airbus A321||Airbus A321
7769 AIRBUS INDUSTRIE A321 SHARKLETS||AIRBUS INDUST...
segmentsDurationInSeconds segmentsDistance segmentsCabinCode strata \
154162 11580 1207 coach 2022-07
231911 7920||9900 600||903 coach||coach 2022-08
287060 8100||9360 600||903 coach||coach 2022-08
112433 8400||7320 725||650 coach||coach 2022-06
7769 15240||6840 None||None coach||coach 2022-05
day_to_dep
154162 1 days
231911 1 days
287060 1 days
112433 1 days
7769 1 days
[5 rows x 29 columns]
Flight 4:
legId searchDate flightDate \
403161 d7dfeb7882f724027fa9777197d0428c 2022-09-08 2022-09-09
178867 c55d621c5370be9fcef1a26280e70e64 2022-07-03 2022-07-04
178513 c7fcff121ef6e89e4a77c28a4740b0b9 2022-07-03 2022-07-04
461473 47a5e03021bfdca92cdc24ec7ce0c1a8 2022-10-02 2022-10-03
46538 7a6cbd6bd8242b59efb244054a5a5050 2022-05-06 2022-05-07
startingAirport destinationAirport fareBasisCode travelDuration \
403161 ATL DFW G0AIZNB1 PT6H27M
178867 ATL DFW S0AHZSN1 PT4H32M
178513 ATL DFW SAA0AKES PT4H29M
461473 ATL DFW G0AHZNN1 PT7H57M
46538 ATL DFW YNR PT7H45M
elapsedDays isBasicEconomy isRefundable ... \
403161 0 True False ...
178867 0 False False ...
178513 0 False False ...
461473 0 False False ...
46538 0 False False ...
segmentsArrivalAirportCode segmentsDepartureAirportCode \
403161 DCA||DFW ATL||DCA
178867 CLT||DFW ATL||CLT
178513 IAH||DFW ATL||IAH
461473 DCA||DFW ATL||DCA
46538 MCO||DFW ATL||MCO
segmentsAirlineName segmentsAirlineCode \
403161 American Airlines||American Airlines AA||AA
178867 American Airlines||American Airlines AA||AA
178513 United||United UA||UA
461473 American Airlines||American Airlines AA||AA
46538 Spirit Airlines||Spirit Airlines NK||NK
segmentsEquipmentDescription \
403161 Embraer 175||Airbus A321
178867 Embraer 175||Airbus A321
178513 Embraer 175 (Enhanced Winglets)||Boeing 737-900
461473 Embraer 175||Boeing 737-800
46538 Airbus A319||AIRBUS INDUSTRIE A320 SHARKLETS
segmentsDurationInSeconds segmentsDistance segmentsCabinCode strata \
403161 6420||12300 541||1177 coach||coach 2022-09
178867 4200||9660 228||930 coach||coach 2022-07
178513 8520||4440 691||233 coach||coach 2022-07
461473 6600||12120 541||1177 coach||coach 2022-10
46538 5160||10140 None||None coach||coach 2022-05
day_to_dep
403161 1 days
178867 1 days
178513 1 days
461473 1 days
46538 1 days
[5 rows x 29 columns]
Flight 5:
legId searchDate flightDate \
139468 0431542756415fffa001408493e6f103 2022-06-30 2022-07-01
407003 60b5c78cc58ec4fbb49acc138ca10114 2022-09-09 2022-09-10
328194 f121a620d2ef05ed37db74653f377388 2022-09-23 2022-09-24
203520 3cb4a08bc036a0f91b4d6349f0d6e1a4 2022-07-10 2022-07-11
46826 9e6a4662e6b6b7f22f976832d1c56704 2022-05-12 2022-05-13
startingAirport destinationAirport fareBasisCode travelDuration \
139468 ATL DTW MA0QA0MQ PT2H1M
407003 ATL DTW KA0OX0MC PT3H32M
328194 ATL DTW G0AIZNN1 PT9H42M
203520 ATL DTW S0AIZNN1 PT7H57M
46826 ATL DTW G0AIZNN1 PT5H16M
elapsedDays isBasicEconomy isRefundable ... \
139468 0 False False ...
407003 0 False False ...
328194 0 False False ...
203520 0 False False ...
46826 0 False False ...
segmentsArrivalAirportCode segmentsDepartureAirportCode \
139468 DTW ATL
407003 CMH||DTW ATL||CMH
328194 ORD||DTW ATL||ORD
203520 DFW||DTW ATL||DFW
46826 LGA||DTW ATL||LGA
segmentsAirlineName segmentsAirlineCode \
139468 Delta DL
407003 Delta||Delta DL||DL
328194 American Airlines||American Airlines AA||AA
203520 American Airlines||American Airlines AA||AA
46826 American Airlines||American Airlines AA||AA
segmentsEquipmentDescription segmentsDurationInSeconds \
139468 Airbus A321 7260
407003 Boeing 737-900||Canadair Regional Jet 900 5340||3900
328194 Airbus A319||Canadian Regional Jet 700 7860||4920
203520 Airbus A321||Boeing 737-800 8400||9600
46826 Embraer 175||Embraer 175 8460||7560
segmentsDistance segmentsCabinCode strata day_to_dep
139468 604 coach 2022-07 1 days
407003 449||161 coach||coach 2022-09 1 days
328194 600||240 coach||coach 2022-09 1 days
203520 725||995 coach||coach 2022-07 1 days
46826 762||485 coach||coach 2022-05 1 days
[5 rows x 29 columns]
Flight 6:
legId searchDate flightDate \
141962 c12c480775783072d67a09986fc79e34 2022-07-22 2022-07-23
19331 8375c54c22136f80b0294f2ef2992fd6 2022-05-08 2022-05-09
248672 63bf60a36024b2d994918fdb2b42dd3a 2022-08-10 2022-08-11
115064 2a9211ad4bd797e833569505a09a073e 2022-06-18 2022-06-19
64666 ad844cd943cc690319efecfaa951a45a 2022-06-09 2022-06-10
startingAirport destinationAirport fareBasisCode travelDuration \
141962 ATL EWR UA0NX0BQ PT2H12M
19331 ATL EWR WAA0OHEN PT2H12M
248672 ATL EWR S0AZZNN1 PT8H33M
115064 ATL EWR N0AJZSN3 PT4H14M
64666 ATL EWR KA0OX0MQ PT2H18M
elapsedDays isBasicEconomy isRefundable ... \
141962 1 True False ...
19331 0 False False ...
248672 0 False False ...
115064 0 False False ...
64666 0 False False ...
segmentsArrivalAirportCode segmentsDepartureAirportCode \
141962 EWR ATL
19331 EWR ATL
248672 DFW||EWR ATL||DFW
115064 CLT||EWR ATL||CLT
64666 EWR ATL
segmentsAirlineName segmentsAirlineCode \
141962 Delta DL
19331 United UA
248672 American Airlines||American Airlines AA||AA
115064 American Airlines||American Airlines AA||AA
64666 Delta DL
segmentsEquipmentDescription segmentsDurationInSeconds \
141962 Airbus A320 7920
19331 Airbus A320 7920
248672 Airbus A321||Boeing 737-800 8280||12420
115064 Airbus A319||Boeing 737-800 4800||6240
64666 Boeing 717 8280
segmentsDistance segmentsCabinCode strata day_to_dep
141962 762 coach 2022-07 1 days
19331 762 coach 2022-05 1 days
248672 725||1380 coach||coach 2022-08 1 days
115064 228||545 coach||coach 2022-06 1 days
64666 762 coach 2022-06 1 days
[5 rows x 29 columns]
Flight 7:
legId searchDate flightDate \
681 9161351438c9606bbed65fcaa6866ff8 2022-04-22 2022-04-23
44517 434876a5ef284c89ef40a53724cf99a6 2022-05-30 2022-05-31
45957 2d79f51258ea472e0dfa2bca4fddf8c4 2022-05-18 2022-05-19
47089 c001fa662635f03b73889e3a9bd94b30 2022-05-08 2022-05-09
47193 d985f367f3de0b48aca9c2c564aa890a 2022-05-07 2022-05-08
startingAirport destinationAirport fareBasisCode travelDuration \
681 ATL IAD L0AIZNN1 PT5H52M
44517 ATL IAD UAA0OKEN PT7H22M
45957 ATL IAD V0AHZNN1 PT6H17M
47089 ATL IAD QAA0OKEN PT6H41M
47193 ATL IAD V0AHZNN1 PT3H50M
elapsedDays isBasicEconomy isRefundable ... \
681 0 False False ...
44517 0 False False ...
45957 0 False False ...
47089 1 False False ...
47193 0 False False ...
segmentsArrivalAirportCode segmentsDepartureAirportCode \
681 CLT||IAD ATL||CLT
44517 ORD||IAD ATL||ORD
45957 CLT||IAD ATL||CLT
47089 ORD||IAD ATL||ORD
47193 CLT||IAD ATL||CLT
segmentsAirlineName segmentsAirlineCode \
681 American Airlines||American Airlines AA||AA
44517 United||United UA||UA
45957 American Airlines||American Airlines AA||AA
47089 United||United UA||UA
47193 American Airlines||American Airlines AA||AA
segmentsEquipmentDescription \
681 Canadair Regional Jet 900||Canadair Regional J...
44517 Embraer 175 (Enhanced Winglets)||Boeing 757-200
45957 Airbus A320||Canadair Regional Jet 900
47089 Airbus A319||Boeing 757-200
47193 Airbus A319||Canadair Regional Jet 900
segmentsDurationInSeconds segmentsDistance segmentsCabinCode strata \
681 5160||5040 228||327 coach||coach 2022-04
44517 7980||6720 600||594 coach||coach 2022-05
45957 4680||5640 228||327 coach||coach 2022-05
47089 7200||6720 600||594 coach||coach 2022-05
47193 4860||5640 228||327 coach||coach 2022-05
day_to_dep
681 1 days
44517 1 days
45957 1 days
47089 1 days
47193 1 days
[5 rows x 29 columns]
Flight 8:
legId searchDate flightDate \
9547 69c10bdbf516541986f832730efd6ef6 2022-05-07 2022-05-08
48708 0d52f1e50e8f25642020855fbedf47d1 2022-05-23 2022-05-24
88415 f1fe1a1d13771c0c37a49db003caf163 2022-05-31 2022-06-01
45228 4e2c07eff593c3a2fa50deb4979f81c7 2022-05-14 2022-05-15
88374 19c64e61f5767d77dff4c8fcb32853b8 2022-06-14 2022-06-15
startingAirport destinationAirport fareBasisCode travelDuration \
9547 ATL JFK V0AHZNN1 PT5H50M
48708 ATL JFK HA0QA0MQ PT6H11M
88415 ATL JFK V0AHZNN1 PT7H30M
45228 ATL JFK YH0JUEY5 PT2H23M
88374 ATL JFK BA0OA0MQ PT2H33M
elapsedDays isBasicEconomy isRefundable ... \
9547 0 False False ...
48708 0 False False ...
88415 0 False False ...
45228 0 False False ...
88374 0 False False ...
segmentsArrivalAirportCode segmentsDepartureAirportCode \
9547 CLT||JFK ATL||CLT
48708 IAD||JFK ATL||IAD
88415 ORD||JFK ATL||ORD
45228 JFK ATL
88374 JFK ATL
segmentsAirlineName segmentsAirlineCode \
9547 American Airlines||American Airlines AA||AA
48708 Delta||Delta DL||DL
88415 American Airlines||American Airlines AA||AA
45228 American Airlines AA
88374 Delta DL
segmentsEquipmentDescription segmentsDurationInSeconds \
9547 Canadair Regional Jet 900||Airbus A319 4860||7200
48708 Boeing 717||Embraer 175 5880||5280
88415 Airbus A319||Boeing 737-800 7620||8040
45228 Embraer 190 8580
88374 Boeing 737-900 9180
segmentsDistance segmentsCabinCode strata day_to_dep
9547 228||545 coach||coach 2022-05 1 days
48708 None||None coach||coach 2022-05 1 days
88415 600||720 coach||coach 2022-06 1 days
45228 762 coach 2022-05 1 days
88374 762 coach 2022-06 1 days
[5 rows x 29 columns]
Flight 9:
legId searchDate flightDate \
149257 d3cd785a6803a59785d4258a2eb52641 2022-07-06 2022-07-07
193393 388f147409b7bf14e3104a2643efcec9 2022-07-06 2022-07-07
239436 634e48e10ef16e7ac43127ba44d18a35 2022-08-06 2022-08-07
137123 63ebad8c27ea8190bcff91f58d318da5 2022-07-06 2022-07-07
134825 84668e140c31c0d757966761fd776890 2022-07-28 2022-07-29
startingAirport destinationAirport fareBasisCode travelDuration \
149257 ATL LAX V0AIZNN1 PT8H52M
193393 ATL LAX V0AIZNN1 PT6H39M
239436 ATL LAX H0XSNR PT6H20M
137123 ATL LAX HA0QA0MQ PT4H40M
134825 ATL LAX V0AIZNN1 PT7H36M
elapsedDays isBasicEconomy isRefundable ... \
149257 0 False False ...
193393 0 False False ...
239436 0 False False ...
137123 0 False False ...
134825 0 False False ...
segmentsArrivalAirportCode segmentsDepartureAirportCode \
149257 CLT||LAX ATL||CLT
193393 DFW||LAX ATL||DFW
239436 LAS||LAX ATL||LAS
137123 LAX ATL
134825 DFW||LAX ATL||DFW
segmentsAirlineName segmentsAirlineCode \
149257 American Airlines||American Airlines AA||AA
193393 American Airlines||American Airlines AA||AA
239436 Spirit Airlines||Spirit Airlines NK||NK
137123 Delta DL
134825 American Airlines||American Airlines AA||AA
segmentsEquipmentDescription segmentsDurationInSeconds \
149257 Canadair Regional Jet 900||Airbus A321 4920||17820
193393 Airbus A321||Airbus A321 8760||11400
239436 AIRBUS INDUSTRIE A321 SHARKLETS|| 15300||4800
137123 Airbus A321 16800
134825 Airbus A321||Airbus A321 8760||11400
segmentsDistance segmentsCabinCode strata day_to_dep
149257 None||None coach||coach 2022-07 1 days
193393 None||None coach||coach 2022-07 1 days
239436 None||None coach||coach 2022-08 1 days
137123 NaN coach 2022-07 1 days
134825 725||1238 coach||coach 2022-07 1 days
[5 rows x 29 columns]
Flight 10:
legId searchDate flightDate \
145114 24c4914eb17e1e93130a8a9bd84cffaa 2022-07-18 2022-07-19
288055 09cec7b8ede6dd4fae26e54ae6067e4b 2022-08-07 2022-08-08
271681 dd7dfe37aaaa4d258ae828eb342f9f98 2022-08-19 2022-08-20
89712 0074c155dcbb11c36369bd2e3afc1967 2022-06-19 2022-06-20
291075 6d0a455008e76d38aa0bc7ac51d111a6 2022-08-27 2022-08-28
startingAirport destinationAirport fareBasisCode travelDuration \
145114 ATL LGA UA0NX0MC PT9H35M
288055 ATL LGA G0AIZNN1 PT6H34M
271681 ATL LGA V0AHZNN1 PT9H2M
89712 ATL LGA G0AHZNN1 PT2H26M
291075 ATL LGA QAA0OKEN PT3H43M
elapsedDays isBasicEconomy isRefundable ... \
145114 1 False False ...
288055 0 False False ...
271681 0 False False ...
89712 0 False False ...
291075 0 False False ...
segmentsArrivalAirportCode segmentsDepartureAirportCode \
145114 BNA||LGA ATL||BNA
288055 DFW||LGA ATL||DFW
271681 DFW||LGA ATL||DFW
89712 LGA ATL
291075 IAD||LGA ATL||IAD
segmentsAirlineName segmentsAirlineCode \
145114 Delta||Delta DL||DL
288055 American Airlines||American Airlines AA||AA
271681 American Airlines||American Airlines AA||AA
89712 American Airlines AA
291075 United||United UA||UA
segmentsEquipmentDescription \
145114 Boeing 737-900||Canadair Regional Jet 900
288055 Airbus A321||Boeing 737-800
271681 Airbus A321||Boeing 737-800
89712 Embraer 170
291075 Embraer 175 (Enhanced Winglets)||Embraer 175 (...
segmentsDurationInSeconds segmentsDistance segmentsCabinCode strata \
145114 3900||8460 215||761 coach||coach 2022-07
288055 8220||12600 725||1380 coach||coach 2022-08
271681 8460||12360 725||1380 coach||coach 2022-08
89712 8760 762 coach 2022-06
291075 5940||5040 541||221 coach||coach 2022-08
day_to_dep
145114 1 days
288055 1 days
271681 1 days
89712 1 days
291075 1 days
[5 rows x 29 columns]
legId searchDate flightDate \
145114 24c4914eb17e1e93130a8a9bd84cffaa 2022-07-18 2022-07-19
288055 09cec7b8ede6dd4fae26e54ae6067e4b 2022-08-07 2022-08-08
271681 dd7dfe37aaaa4d258ae828eb342f9f98 2022-08-19 2022-08-20
89712 0074c155dcbb11c36369bd2e3afc1967 2022-06-19 2022-06-20
291075 6d0a455008e76d38aa0bc7ac51d111a6 2022-08-27 2022-08-28
startingAirport destinationAirport fareBasisCode travelDuration \
145114 ATL LGA UA0NX0MC PT9H35M
288055 ATL LGA G0AIZNN1 PT6H34M
271681 ATL LGA V0AHZNN1 PT9H2M
89712 ATL LGA G0AHZNN1 PT2H26M
291075 ATL LGA QAA0OKEN PT3H43M
elapsedDays isBasicEconomy isRefundable ... \
145114 1 False False ...
288055 0 False False ...
271681 0 False False ...
89712 0 False False ...
291075 0 False False ...
segmentsArrivalAirportCode segmentsDepartureAirportCode \
145114 BNA||LGA ATL||BNA
288055 DFW||LGA ATL||DFW
271681 DFW||LGA ATL||DFW
89712 LGA ATL
291075 IAD||LGA ATL||IAD
segmentsAirlineName segmentsAirlineCode \
145114 Delta||Delta DL||DL
288055 American Airlines||American Airlines AA||AA
271681 American Airlines||American Airlines AA||AA
89712 American Airlines AA
291075 United||United UA||UA
segmentsEquipmentDescription \
145114 Boeing 737-900||Canadair Regional Jet 900
288055 Airbus A321||Boeing 737-800
271681 Airbus A321||Boeing 737-800
89712 Embraer 170
291075 Embraer 175 (Enhanced Winglets)||Embraer 175 (...
segmentsDurationInSeconds segmentsDistance segmentsCabinCode strata \
145114 3900||8460 215||761 coach||coach 2022-07
288055 8220||12600 725||1380 coach||coach 2022-08
271681 8460||12360 725||1380 coach||coach 2022-08
89712 8760 762 coach 2022-06
291075 5940||5040 541||221 coach||coach 2022-08
day_to_dep
145114 1 days
288055 1 days
271681 1 days
89712 1 days
291075 1 days
[5 rows x 29 columns]
In [88]:
print(flight_price_changes)
size = len(flight_price_changes)
# Plot the price changes for each flight
fig, axs = plt.subplots(size, 1, figsize=(10, 10*size))
for i, flight_df in enumerate(flight_price_changes):
axs[i].plot(flight_df['day_to_dep'], flight_df['totalFare'])
axs[i].set_title(f"Flight {i+1}")
axs[i].set_xlabel('Days to departure')
axs[i].set_ylabel('Price')
axs[i].grid()
plt.tight_layout()
plt.show()
[ legId searchDate flightDate \
265868 1b24e4d54f7c8d28063d298ed47e23f2 2022-08-04 2022-08-05
372668 ce88de3ce7239c38c6dcfb0dc604ce7a 2022-09-09 2022-09-10
380736 d17b007cc10e1369c8c784a50e74d956 2022-09-09 2022-09-10
212001 e3e40b3b6af11d461414937372dc7366 2022-08-29 2022-08-30
199297 4fbd160789722372f5d3c4c6da7e6cc9 2022-07-25 2022-07-26
... ... ... ...
134716 05826df8e1a76da9d53924bfe6f7d1bc 2022-05-09 2022-07-08
315495 9f464b3cb2e1d5e178790c959053091d 2022-07-19 2022-09-17
396178 d37f66af664108c7d51bcab3c081d798 2022-07-27 2022-09-25
126977 280db079ba41fd6252bab00bdd103818 2022-05-21 2022-07-20
220178 69b78fdc48060a0cb7b75080d07e64a0 2022-06-10 2022-08-09
startingAirport destinationAirport fareBasisCode travelDuration \
265868 ATL BOS G0AIZNN1 PT6H59M
372668 ATL BOS KA0NX0MC PT2H36M
380736 ATL BOS G0AJZNN1 PT6H52M
212001 ATL BOS UA0NX0MQ PT10H33M
199297 ATL BOS G0AIZNN1 PT4H39M
... ... ... ... ...
134716 ATL BOS ZI4AUEL1 PT10H36M
315495 ATL BOS KAVTA5MC PT7H
396178 ATL BOS KAVOA0MC PT6H45M
126977 ATL BOS KAVOA0MQ PT2H37M
220178 ATL BOS VAA0AKEN PT4H43M
elapsedDays isBasicEconomy isRefundable ... \
265868 0 False False ...
372668 0 False False ...
380736 0 False False ...
212001 1 False False ...
199297 0 False False ...
... ... ... ... ...
134716 0 False False ...
315495 0 False False ...
396178 0 False False ...
126977 0 False False ...
220178 0 False False ...
segmentsArrivalAirportCode segmentsDepartureAirportCode \
265868 CLT||BOS ATL||CLT
372668 BOS ATL
380736 MIA||BOS ATL||MIA
212001 DCA||BOS ATL||DCA
199297 PHL||BOS ATL||PHL
... ... ...
134716 JFK||BOS ATL||JFK
315495 MCI||BOS ATL||MCI
396178 LGA||BOS ATL||LGA
126977 BOS ATL
220178 EWR||BOS ATL||EWR
segmentsAirlineName segmentsAirlineCode \
265868 American Airlines||American Airlines AA||AA
372668 Delta DL
380736 American Airlines||American Airlines AA||AA
212001 Delta||Delta DL||DL
199297 American Airlines||American Airlines AA||AA
... ... ...
134716 JetBlue Airways||JetBlue Airways B6||B6
315495 Delta||Delta DL||DL
396178 Delta||Delta DL||DL
126977 Delta DL
220178 United||United UA||UA
segmentsEquipmentDescription segmentsDurationInSeconds \
265868 Canadair Regional Jet 900||Airbus A321 4920||7440
372668 Airbus A321 9360
380736 Airbus A319||Boeing 737-800 6960||11820
212001 Airbus A320||Embraer 175 6600||5880
199297 Canadair Regional Jet 900||Airbus A321 7920||4980
... ... ...
134716 Airbus A320||Airbus A320 8520||4980
315495 Boeing 737-900||Airbus A220-100 7260||10740
396178 Airbus A320||Airbus A220-100 8700||4560
126977 Airbus A321 9420
220178 Boeing 737-800||Boeing 737-800 9180||5400
segmentsDistance segmentsCabinCode strata day_to_dep
265868 228||728 coach||coach 2022-08 1 days
372668 947 coach 2022-09 1 days
380736 596||1260 coach||coach 2022-09 1 days
212001 541||406 coach||coach 2022-08 1 days
199297 667||280 coach||coach 2022-07 1 days
... ... ... ... ...
134716 762||185 coach||coach 2022-07 60 days
315495 693||1254 coach||coach 2022-09 60 days
396178 762||185 coach||coach 2022-09 60 days
126977 947 coach 2022-07 60 days
220178 762||185 coach||coach 2022-08 60 days
[2996 rows x 29 columns], legId searchDate flightDate \
21058 bbad056d745a752aaa4ac64523e3f2dd 2022-05-24 2022-05-25
17599 b281726ac59fd79c072688ec75bea94f 2022-05-08 2022-05-09
8204 b7b0437b68caaa51a0e35b520e701e9b 2022-04-30 2022-05-01
139881 25c97c43caf3b9371de8489a706babdb 2022-07-01 2022-07-02
48333 2c1038d1a6e77c759fabcd9ba7c76e73 2022-04-30 2022-05-01
... ... ... ...
232385 2586872abf05af73cdbb6b0c69f55646 2022-06-15 2022-08-14
360684 3e039c8074fd35f1be237dbd2a1ccfb6 2022-07-18 2022-09-16
114846 3528caca67637e20a4921733e8708a27 2022-04-22 2022-06-21
98741 33e33ef92d492b9e87f707d31ef447b5 2022-04-29 2022-06-28
345994 c465da63889caeff395d8cfaaba8123e 2022-07-14 2022-09-12
startingAirport destinationAirport fareBasisCode travelDuration \
21058 ATL CLT MA0QA0MQ PT1H14M
17599 ATL CLT M0AHZNN1 PT1H21M
8204 ATL CLT M0AHZNN1 PT5H21M
139881 ATL CLT MA0QA0MQ PT1H14M
48333 ATL CLT M0AHZNN1 PT7H41M
... ... ... ... ...
232385 ATL CLT NVAHZNN1 PT1H19M
360684 ATL CLT LAUOA0MQ PT1H12M
114846 ATL CLT UAVOA0MQ PT1H14M
98741 ATL CLT VUAHZNN1 PT1H10M
345994 ATL CLT XAVQA0MQ PT1H16M
elapsedDays isBasicEconomy isRefundable ... \
21058 0 False False ...
17599 0 False False ...
8204 0 False False ...
139881 0 False False ...
48333 0 False False ...
... ... ... ... ...
232385 0 False False ...
360684 0 False False ...
114846 0 False False ...
98741 0 False False ...
345994 0 False False ...
segmentsArrivalAirportCode segmentsDepartureAirportCode \
21058 CLT ATL
17599 CLT ATL
8204 MIA||CLT ATL||MIA
139881 CLT ATL
48333 MIA||CLT ATL||MIA
... ... ...
232385 CLT ATL
360684 CLT ATL
114846 CLT ATL
98741 CLT ATL
345994 CLT ATL
segmentsAirlineName segmentsAirlineCode \
21058 Delta DL
17599 American Airlines AA
8204 American Airlines||American Airlines AA||AA
139881 Delta DL
48333 American Airlines||American Airlines AA||AA
... ... ...
232385 American Airlines AA
360684 Delta DL
114846 Delta DL
98741 American Airlines AA
345994 Delta DL
segmentsEquipmentDescription segmentsDurationInSeconds \
21058 Boeing 717 4440
17599 Canadian Regional Jet 700 4860
8204 Airbus A319||Boeing 737-800 7260||8100
139881 Boeing 717 4440
48333 Airbus A319||Boeing 737-800 7020||7680
... ... ...
232385 Boeing 737-800 4740
360684 Boeing 717 4320
114846 Boeing 717 4440
98741 Embraer 175 4200
345994 Boeing 717 4560
segmentsDistance segmentsCabinCode strata day_to_dep
21058 228 coach 2022-05 1 days
17599 228 coach 2022-05 1 days
8204 596||652 coach||coach 2022-05 1 days
139881 228 coach 2022-07 1 days
48333 596||652 coach||coach 2022-05 1 days
... ... ... ... ...
232385 228 coach 2022-08 60 days
360684 228 coach 2022-09 60 days
114846 228 coach 2022-06 60 days
98741 228 coach 2022-06 60 days
345994 228 coach 2022-09 60 days
[1418 rows x 29 columns], legId searchDate flightDate \
154162 a2af6905cf96ee472e4e448fe11ae1e6 2022-07-02 2022-07-03
231911 021bdeba2c2f79d077ccbf557e7d7066 2022-08-17 2022-08-18
287060 d2b3d45f38c9f7f6fab83344f9438d85 2022-08-28 2022-08-29
112433 2fdd83601291ddc67878e750053f6f9e 2022-06-11 2022-06-12
7769 587bda3af997225927e42d78cfd50a52 2022-05-29 2022-05-30
... ... ... ...
226939 7aafff4a778ea705948ff13b8ab76103 2022-07-02 2022-08-31
183687 e7374b34ad3d7a330d2cc731b8cc43ef 2022-05-09 2022-07-08
365014 c577176d16bef1c9a165b68d228f38c1 2022-07-18 2022-09-16
196968 b2f0df07d392bb5e048edf6001424907 2022-05-28 2022-07-27
223748 90d8ad6cab967936c724861a569a1ce3 2022-06-10 2022-08-09
startingAirport destinationAirport fareBasisCode travelDuration \
154162 ATL DEN L00YXS2 PT3H13M
231911 ATL DEN VAA0AKEN PT5H55M
287060 ATL DEN S0AIZSN3 PT5H55M
112433 ATL DEN M0AIZNN1 PT7H29M
7769 ATL DEN YNR PT8H30M
... ... ... ... ...
226939 ATL DEN U21ZXS5 PT3H20M
183687 ATL DEN HA0OA0MQ PT3H8M
365014 ATL DEN QAA0OKEN PT3H25M
196968 ATL DEN VA3NR PT10H33M
223748 ATL DEN UA21NR PT7H40M
elapsedDays isBasicEconomy isRefundable ... \
154162 0 False False ...
231911 0 False False ...
287060 0 False False ...
112433 0 False False ...
7769 0 False False ...
... ... ... ... ...
226939 0 False False ...
183687 0 False False ...
365014 0 False False ...
196968 0 False False ...
223748 0 False False ...
segmentsArrivalAirportCode segmentsDepartureAirportCode \
154162 DEN ATL
231911 ORD||DEN ATL||ORD
287060 ORD||DEN ATL||ORD
112433 DFW||DEN ATL||DFW
7769 LAS||DEN ATL||LAS
... ... ...
226939 DEN ATL
183687 DEN ATL
365014 DEN ATL
196968 LAS||DEN ATL||LAS
223748 LAS||DEN ATL||LAS
segmentsAirlineName segmentsAirlineCode \
154162 Frontier Airlines F9
231911 United||United UA||UA
287060 American Airlines||American Airlines AA||AA
112433 American Airlines||American Airlines AA||AA
7769 Spirit Airlines||Spirit Airlines NK||NK
... ... ...
226939 Frontier Airlines F9
183687 Delta DL
365014 United UA
196968 Spirit Airlines||Spirit Airlines NK||NK
223748 Spirit Airlines||Spirit Airlines NK||NK
segmentsEquipmentDescription \
154162 NaN
231911 Airbus A319||Boeing 757-300
287060 Airbus A319||Boeing 737-800
112433 Airbus A321||Airbus A321
7769 AIRBUS INDUSTRIE A321 SHARKLETS||AIRBUS INDUST...
... ...
226939 NaN
183687 Airbus A321
365014 Embraer 175 (Enhanced Winglets)
196968 Airbus A319||AIRBUS INDUSTRIE A320 SHARKLETS
223748 AIRBUS INDUSTRIE A321 SHARKLETS||AIRBUS INDUST...
segmentsDurationInSeconds segmentsDistance segmentsCabinCode strata \
154162 11580 1207 coach 2022-07
231911 7920||9900 600||903 coach||coach 2022-08
287060 8100||9360 600||903 coach||coach 2022-08
112433 8400||7320 725||650 coach||coach 2022-06
7769 15240||6840 None||None coach||coach 2022-05
... ... ... ... ...
226939 12000 1207 coach 2022-08
183687 11280 1207 coach 2022-07
365014 12300 1207 coach 2022-09
196968 15480||7320 None||None coach||coach 2022-07
223748 15300||7200 None||None coach||coach 2022-08
day_to_dep
154162 1 days
231911 1 days
287060 1 days
112433 1 days
7769 1 days
... ...
226939 60 days
183687 60 days
365014 60 days
196968 60 days
223748 60 days
[1867 rows x 29 columns], legId searchDate flightDate \
403161 d7dfeb7882f724027fa9777197d0428c 2022-09-08 2022-09-09
178867 c55d621c5370be9fcef1a26280e70e64 2022-07-03 2022-07-04
178513 c7fcff121ef6e89e4a77c28a4740b0b9 2022-07-03 2022-07-04
461473 47a5e03021bfdca92cdc24ec7ce0c1a8 2022-10-02 2022-10-03
46538 7a6cbd6bd8242b59efb244054a5a5050 2022-05-06 2022-05-07
... ... ... ...
366896 574e3605cb0f382b7a0b6299e18f97c5 2022-07-04 2022-09-02
361426 3677941e009086fde23a16850d0aeac0 2022-07-03 2022-09-01
72076 3b4ed1dcdd464211950134029a4226a5 2022-04-25 2022-06-24
317645 b7388bad7b8c766d5c90313fe6541ceb 2022-07-29 2022-09-27
382583 f100c0544c1db356a189021e4953b3d6 2022-07-20 2022-09-18
startingAirport destinationAirport fareBasisCode travelDuration \
403161 ATL DFW G0AIZNB1 PT6H27M
178867 ATL DFW S0AHZSN1 PT4H32M
178513 ATL DFW SAA0AKES PT4H29M
461473 ATL DFW G0AHZNN1 PT7H57M
46538 ATL DFW YNR PT7H45M
... ... ... ... ...
366896 ATL DFW SVAIZNN1 PT7H21M
361426 ATL DFW E14GXP5 PT7H34M
72076 ATL DFW UAVNA0BQ PT2H14M
317645 ATL DFW NVAHZNN3 PT2H24M
382583 ATL DFW HA3OA0MQ PT2H19M
elapsedDays isBasicEconomy isRefundable ... \
403161 0 True False ...
178867 0 False False ...
178513 0 False False ...
461473 0 False False ...
46538 0 False False ...
... ... ... ... ...
366896 0 False False ...
361426 0 False False ...
72076 0 True False ...
317645 0 False False ...
382583 0 False False ...
segmentsArrivalAirportCode segmentsDepartureAirportCode \
403161 DCA||DFW ATL||DCA
178867 CLT||DFW ATL||CLT
178513 IAH||DFW ATL||IAH
461473 DCA||DFW ATL||DCA
46538 MCO||DFW ATL||MCO
... ... ...
366896 ORD||DFW ATL||ORD
361426 PHL||DFW ATL||PHL
72076 DFW ATL
317645 DFW ATL
382583 DFW ATL
segmentsAirlineName segmentsAirlineCode \
403161 American Airlines||American Airlines AA||AA
178867 American Airlines||American Airlines AA||AA
178513 United||United UA||UA
461473 American Airlines||American Airlines AA||AA
46538 Spirit Airlines||Spirit Airlines NK||NK
... ... ...
366896 American Airlines||American Airlines AA||AA
361426 Frontier Airlines||Frontier Airlines F9||F9
72076 Delta DL
317645 American Airlines AA
382583 Delta DL
segmentsEquipmentDescription \
403161 Embraer 175||Airbus A321
178867 Embraer 175||Airbus A321
178513 Embraer 175 (Enhanced Winglets)||Boeing 737-900
461473 Embraer 175||Boeing 737-800
46538 Airbus A319||AIRBUS INDUSTRIE A320 SHARKLETS
... ...
366896 Airbus A319||Boeing 737-800
361426 ||
72076 Airbus A321
317645 Boeing 737-800
382583 Airbus A321
segmentsDurationInSeconds segmentsDistance segmentsCabinCode strata \
403161 6420||12300 541||1177 coach||coach 2022-09
178867 4200||9660 228||930 coach||coach 2022-07
178513 8520||4440 691||233 coach||coach 2022-07
461473 6600||12120 541||1177 coach||coach 2022-10
46538 5160||10140 None||None coach||coach 2022-05
... ... ... ... ...
366896 8100||8460 600||799 coach||coach 2022-09
361426 7800||13380 667||1298 coach||coach 2022-09
72076 8040 725 coach 2022-06
317645 8640 725 coach 2022-09
382583 8340 725 coach 2022-09
day_to_dep
403161 1 days
178867 1 days
178513 1 days
461473 1 days
46538 1 days
... ...
366896 60 days
361426 60 days
72076 60 days
317645 60 days
382583 60 days
[1752 rows x 29 columns], legId searchDate flightDate \
139468 0431542756415fffa001408493e6f103 2022-06-30 2022-07-01
407003 60b5c78cc58ec4fbb49acc138ca10114 2022-09-09 2022-09-10
328194 f121a620d2ef05ed37db74653f377388 2022-09-23 2022-09-24
203520 3cb4a08bc036a0f91b4d6349f0d6e1a4 2022-07-10 2022-07-11
46826 9e6a4662e6b6b7f22f976832d1c56704 2022-05-12 2022-05-13
... ... ... ...
289471 e5f199d0e0b2dd903880386645f27f6d 2022-06-18 2022-08-17
162633 e2c869eb9e10f595eae36ceae8eaa5a2 2022-05-07 2022-07-06
163163 8c90a54bc202d3b97ea74e9df0f76e86 2022-05-28 2022-07-27
126188 e1c386d437f8ef1c1fb016488e1ab1f0 2022-05-03 2022-07-02
124776 80a52105b9215c7dd3c049372e7e3ce2 2022-05-26 2022-07-25
startingAirport destinationAirport fareBasisCode travelDuration \
139468 ATL DTW MA0QA0MQ PT2H1M
407003 ATL DTW KA0OX0MC PT3H32M
328194 ATL DTW G0AIZNN1 PT9H42M
203520 ATL DTW S0AIZNN1 PT7H57M
46826 ATL DTW G0AIZNN1 PT5H16M
... ... ... ... ...
289471 ATL DTW KAUOA0MC PT4H36M
162633 ATL DTW GUAHZNN3 PT10H46M
163163 ATL DTW LAUOA0MQ PT1H55M
126188 ATL DTW S3AIZNN1 PT4H38M
124776 ATL DTW WAA4AKEN PT3H49M
elapsedDays isBasicEconomy isRefundable ... \
139468 0 False False ...
407003 0 False False ...
328194 0 False False ...
203520 0 False False ...
46826 0 False False ...
... ... ... ... ...
289471 0 False False ...
162633 0 False False ...
163163 0 False False ...
126188 1 False False ...
124776 0 False False ...
segmentsArrivalAirportCode segmentsDepartureAirportCode \
139468 DTW ATL
407003 CMH||DTW ATL||CMH
328194 ORD||DTW ATL||ORD
203520 DFW||DTW ATL||DFW
46826 LGA||DTW ATL||LGA
... ... ...
289471 BWI||DTW ATL||BWI
162633 DCA||LGA||DTW ATL||DCA||LGA
163163 DTW ATL
126188 CLT||DTW ATL||CLT
124776 IAD||DTW ATL||IAD
segmentsAirlineName \
139468 Delta
407003 Delta||Delta
328194 American Airlines||American Airlines
203520 American Airlines||American Airlines
46826 American Airlines||American Airlines
... ...
289471 Delta||Delta
162633 American Airlines||American Airlines||American...
163163 Delta
126188 American Airlines||American Airlines
124776 United||United
segmentsAirlineCode \
139468 DL
407003 DL||DL
328194 AA||AA
203520 AA||AA
46826 AA||AA
... ...
289471 DL||DL
162633 AA||AA||AA
163163 DL
126188 AA||AA
124776 UA||UA
segmentsEquipmentDescription \
139468 Airbus A321
407003 Boeing 737-900||Canadair Regional Jet 900
328194 Airbus A319||Canadian Regional Jet 700
203520 Airbus A321||Boeing 737-800
46826 Embraer 175||Embraer 175
... ...
289471 Boeing 737-900||Boeing 737-900
162633 Embraer 175||Airbus A319||Embraer 170
163163 Boeing 757
126188 Canadair Regional Jet 900||Boeing 737-800
124776 Embraer 175 (Enhanced Winglets)||Canadair Regi...
segmentsDurationInSeconds segmentsDistance segmentsCabinCode \
139468 7260 604 coach
407003 5340||3900 449||161 coach||coach
328194 7860||4920 600||240 coach||coach
203520 8400||9600 725||995 coach||coach
46826 8460||7560 762||485 coach||coach
... ... ... ...
289471 6720||5640 578||404 coach||coach
162633 6900||4620||7200 541||221||485 coach||coach||coach
163163 6900 604 coach
126188 5100||6780 228||505 coach||coach
124776 5940||5400 541||391 coach||coach
strata day_to_dep
139468 2022-07 1 days
407003 2022-09 1 days
328194 2022-09 1 days
203520 2022-07 1 days
46826 2022-05 1 days
... ... ...
289471 2022-08 60 days
162633 2022-07 60 days
163163 2022-07 60 days
126188 2022-07 60 days
124776 2022-07 60 days
[2310 rows x 29 columns], legId searchDate flightDate \
141962 c12c480775783072d67a09986fc79e34 2022-07-22 2022-07-23
19331 8375c54c22136f80b0294f2ef2992fd6 2022-05-08 2022-05-09
248672 63bf60a36024b2d994918fdb2b42dd3a 2022-08-10 2022-08-11
115064 2a9211ad4bd797e833569505a09a073e 2022-06-18 2022-06-19
64666 ad844cd943cc690319efecfaa951a45a 2022-06-09 2022-06-10
... ... ... ...
413023 243bc970d385d813a017b3795c75d79d 2022-07-05 2022-09-03
196967 76f157df074f54dddea95f3997d3c3e5 2022-05-03 2022-07-02
110701 4346c344f073c35fae8c01485b58aee3 2022-04-25 2022-06-24
412531 078aa845a8d1605c19bbfa2395ae0688 2022-07-29 2022-09-27
326244 fb9708e20b36f74324a120507b51cadc 2022-07-22 2022-09-20
startingAirport destinationAirport fareBasisCode travelDuration \
141962 ATL EWR UA0NX0BQ PT2H12M
19331 ATL EWR WAA0OHEN PT2H12M
248672 ATL EWR S0AZZNN1 PT8H33M
115064 ATL EWR N0AJZSN3 PT4H14M
64666 ATL EWR KA0OX0MQ PT2H18M
... ... ... ... ...
413023 ATL EWR HAA7AHEN PT2H13M
196967 ATL EWR MAA2JKES PT2H13M
110701 ATL EWR HA0QA0MQ PT2H17M
412531 ATL EWR KAA2TWBN PT4H11M
326244 ATL EWR NUAHZNN3 PT5H26M
elapsedDays isBasicEconomy isRefundable ... \
141962 1 True False ...
19331 0 False False ...
248672 0 False False ...
115064 0 False False ...
64666 0 False False ...
... ... ... ... ...
413023 0 False False ...
196967 0 False False ...
110701 0 False False ...
412531 0 True False ...
326244 0 False False ...
segmentsArrivalAirportCode segmentsDepartureAirportCode \
141962 EWR ATL
19331 EWR ATL
248672 DFW||EWR ATL||DFW
115064 CLT||EWR ATL||CLT
64666 EWR ATL
... ... ...
413023 EWR ATL
196967 EWR ATL
110701 EWR ATL
412531 IAD||EWR ATL||IAD
326244 ORD||EWR ATL||ORD
segmentsAirlineName segmentsAirlineCode \
141962 Delta DL
19331 United UA
248672 American Airlines||American Airlines AA||AA
115064 American Airlines||American Airlines AA||AA
64666 Delta DL
... ... ...
413023 United UA
196967 United UA
110701 Delta DL
412531 United||United UA||UA
326244 American Airlines||American Airlines AA||AA
segmentsEquipmentDescription \
141962 Airbus A320
19331 Airbus A320
248672 Airbus A321||Boeing 737-800
115064 Airbus A319||Boeing 737-800
64666 Boeing 717
... ...
413023 Boeing 737-700
196967 Boeing 737-700
110701 Boeing 737-800
412531 Embraer 175 (Enhanced Winglets)||Boeing 737-800
326244 Embraer 175||Boeing 737-800
segmentsDurationInSeconds segmentsDistance segmentsCabinCode strata \
141962 7920 762 coach 2022-07
19331 7920 762 coach 2022-05
248672 8280||12420 725||1380 coach||coach 2022-08
115064 4800||6240 228||545 coach||coach 2022-06
64666 8280 762 coach 2022-06
... ... ... ... ...
413023 7980 NaN coach 2022-09
196967 7980 762 coach 2022-07
110701 8220 762 coach 2022-06
412531 6420||4800 541||221 coach||coach 2022-09
326244 7680||7560 600||720 coach||coach 2022-09
day_to_dep
141962 1 days
19331 1 days
248672 1 days
115064 1 days
64666 1 days
... ...
413023 60 days
196967 60 days
110701 60 days
412531 60 days
326244 60 days
[1857 rows x 29 columns], legId searchDate flightDate \
681 9161351438c9606bbed65fcaa6866ff8 2022-04-22 2022-04-23
44517 434876a5ef284c89ef40a53724cf99a6 2022-05-30 2022-05-31
45957 2d79f51258ea472e0dfa2bca4fddf8c4 2022-05-18 2022-05-19
47089 c001fa662635f03b73889e3a9bd94b30 2022-05-08 2022-05-09
47193 d985f367f3de0b48aca9c2c564aa890a 2022-05-07 2022-05-08
... ... ... ...
327886 cc5f018aa13b0bc5a09838b8c05e1132 2022-07-16 2022-09-14
331822 058331e9324c5093679213c4ec188517 2022-07-09 2022-09-07
347245 e7a02f568d52c5c1719bf12fcb27b72a 2022-07-10 2022-09-08
406141 c85c7fe3f251d1c5cae36e6f7a30266c 2022-07-16 2022-09-14
293086 caeef6a8b4f45f6451e70bcb22ec5c19 2022-06-10 2022-08-09
startingAirport destinationAirport fareBasisCode travelDuration \
681 ATL IAD L0AIZNN1 PT5H52M
44517 ATL IAD UAA0OKEN PT7H22M
45957 ATL IAD V0AHZNN1 PT6H17M
47089 ATL IAD QAA0OKEN PT6H41M
47193 ATL IAD V0AHZNN1 PT3H50M
... ... ... ... ...
327886 ATL IAD TAVTA5BC PT1H40M
331822 ATL IAD QVAHZNN3 PT3H48M
347245 ATL IAD V0AHZNN1 PT8H19M
406141 ATL IAD NVAIKSN1 PT3H34M
293086 ATL IAD XAVTA0BC PT1H45M
elapsedDays isBasicEconomy isRefundable ... \
681 0 False False ...
44517 0 False False ...
45957 0 False False ...
47089 1 False False ...
47193 0 False False ...
... ... ... ... ...
327886 1 True False ...
331822 0 False False ...
347245 0 False False ...
406141 0 False False ...
293086 0 True False ...
segmentsArrivalAirportCode segmentsDepartureAirportCode \
681 CLT||IAD ATL||CLT
44517 ORD||IAD ATL||ORD
45957 CLT||IAD ATL||CLT
47089 ORD||IAD ATL||ORD
47193 CLT||IAD ATL||CLT
... ... ...
327886 IAD ATL
331822 CLT||IAD ATL||CLT
347245 DFW||IAD ATL||DFW
406141 CLT||IAD ATL||CLT
293086 IAD ATL
segmentsAirlineName segmentsAirlineCode \
681 American Airlines||American Airlines AA||AA
44517 United||United UA||UA
45957 American Airlines||American Airlines AA||AA
47089 United||United UA||UA
47193 American Airlines||American Airlines AA||AA
... ... ...
327886 Delta DL
331822 American Airlines||American Airlines AA||AA
347245 American Airlines||American Airlines AA||AA
406141 American Airlines||American Airlines AA||AA
293086 Delta DL
segmentsEquipmentDescription \
681 Canadair Regional Jet 900||Canadair Regional J...
44517 Embraer 175 (Enhanced Winglets)||Boeing 757-200
45957 Airbus A320||Canadair Regional Jet 900
47089 Airbus A319||Boeing 757-200
47193 Airbus A319||Canadair Regional Jet 900
... ...
327886 Boeing 717
331822 Airbus A320||Airbus A319
347245 Boeing 737-800||Boeing 737-800
406141 Canadair Regional Jet 900||Canadair Regional J...
293086 Boeing 717
segmentsDurationInSeconds segmentsDistance segmentsCabinCode strata \
681 5160||5040 228||327 coach||coach 2022-04
44517 7980||6720 600||594 coach||coach 2022-05
45957 4680||5640 228||327 coach||coach 2022-05
47089 7200||6720 600||594 coach||coach 2022-05
47193 4860||5640 228||327 coach||coach 2022-05
... ... ... ... ...
327886 6000 541 coach 2022-09
331822 4560||4620 228||327 coach||coach 2022-09
347245 8760||10260 725||1177 coach||coach 2022-09
406141 5160||4980 228||327 coach||coach 2022-09
293086 6300 541 coach 2022-08
day_to_dep
681 1 days
44517 1 days
45957 1 days
47089 1 days
47193 1 days
... ...
327886 60 days
331822 60 days
347245 60 days
406141 60 days
293086 60 days
[898 rows x 29 columns], legId searchDate flightDate \
9547 69c10bdbf516541986f832730efd6ef6 2022-05-07 2022-05-08
48708 0d52f1e50e8f25642020855fbedf47d1 2022-05-23 2022-05-24
88415 f1fe1a1d13771c0c37a49db003caf163 2022-05-31 2022-06-01
45228 4e2c07eff593c3a2fa50deb4979f81c7 2022-05-14 2022-05-15
88374 19c64e61f5767d77dff4c8fcb32853b8 2022-06-14 2022-06-15
... ... ... ...
123902 3d0ddd74553120c8c0ac3393b5a3e8de 2022-05-05 2022-07-04
130030 dd7d17519dbd1e22a179288873893be8 2022-05-03 2022-07-02
286072 66ebf8cc77b8aeacb26b22ccb90625a3 2022-06-30 2022-08-29
292111 c8c0fa1c467427abad9123b15e7ec90d 2022-06-04 2022-08-03
231794 0d78e796f916b0ff4f23e20144ee4700 2022-06-26 2022-08-25
startingAirport destinationAirport fareBasisCode travelDuration \
9547 ATL JFK V0AHZNN1 PT5H50M
48708 ATL JFK HA0QA0MQ PT6H11M
88415 ATL JFK V0AHZNN1 PT7H30M
45228 ATL JFK YH0JUEY5 PT2H23M
88374 ATL JFK BA0OA0MQ PT2H33M
... ... ... ... ...
123902 ATL JFK KAUOA0MQ PT7H4M
130030 ATL JFK BH0ABEL1 PT2H22M
286072 ATL JFK PL2ABEL1 PT2H15M
292111 ATL JFK XAVNA0BC PT11H
231794 ATL JFK UAA0OFEN PT12H53M
elapsedDays isBasicEconomy isRefundable ... \
9547 0 False False ...
48708 0 False False ...
88415 0 False False ...
45228 0 False False ...
88374 0 False False ...
... ... ... ... ...
123902 0 False False ...
130030 0 False False ...
286072 0 False False ...
292111 1 True False ...
231794 1 False False ...
segmentsArrivalAirportCode segmentsDepartureAirportCode \
9547 CLT||JFK ATL||CLT
48708 IAD||JFK ATL||IAD
88415 ORD||JFK ATL||ORD
45228 JFK ATL
88374 JFK ATL
... ... ...
123902 SAV||JFK ATL||SAV
130030 JFK ATL
286072 JFK ATL
292111 MIA||JFK ATL||MIA
231794 ORD||SYR||JFK ATL||ORD||SYR
segmentsAirlineName segmentsAirlineCode \
9547 American Airlines||American Airlines AA||AA
48708 Delta||Delta DL||DL
88415 American Airlines||American Airlines AA||AA
45228 American Airlines AA
88374 Delta DL
... ... ...
123902 Delta||Delta DL||DL
130030 JetBlue Airways B6
286072 JetBlue Airways B6
292111 Delta||Delta DL||DL
231794 United||United||Delta UA||UA||DL
segmentsEquipmentDescription \
9547 Canadair Regional Jet 900||Airbus A319
48708 Boeing 717||Embraer 175
88415 Airbus A319||Boeing 737-800
45228 Embraer 190
88374 Boeing 737-900
... ...
123902 Boeing 757||Canadair Regional Jet 900
130030 Airbus A320
286072 Airbus A320
292111 Boeing 737-900||Boeing 737-800
231794 Airbus A319||Boeing 737-900||Canadair Regional...
segmentsDurationInSeconds segmentsDistance segmentsCabinCode \
9547 4860||7200 228||545 coach||coach
48708 5880||5280 None||None coach||coach
88415 7620||8040 600||720 coach||coach
45228 8580 762 coach
88374 9180 762 coach
... ... ... ...
123902 3900||8340 215||726 coach||coach
130030 8520 762 coach
286072 8100 762 coach
292111 6660||10740 596||1104 coach||coach
231794 7980||6840||5100 600||602||189 coach||coach||coach
strata day_to_dep
9547 2022-05 1 days
48708 2022-05 1 days
88415 2022-06 1 days
45228 2022-05 1 days
88374 2022-06 1 days
... ... ...
123902 2022-07 60 days
130030 2022-07 60 days
286072 2022-08 60 days
292111 2022-08 60 days
231794 2022-08 60 days
[1836 rows x 29 columns], legId searchDate flightDate \
149257 d3cd785a6803a59785d4258a2eb52641 2022-07-06 2022-07-07
193393 388f147409b7bf14e3104a2643efcec9 2022-07-06 2022-07-07
239436 634e48e10ef16e7ac43127ba44d18a35 2022-08-06 2022-08-07
137123 63ebad8c27ea8190bcff91f58d318da5 2022-07-06 2022-07-07
134825 84668e140c31c0d757966761fd776890 2022-07-28 2022-07-29
... ... ... ...
354113 c05d8c28a342fc8888f0ffba1bcce3a7 2022-07-14 2022-09-12
352341 693b088a715dfa5822073d4287ee580e 2022-07-21 2022-09-19
304214 12da0d6aa846c0153ae611ebe71b6ab1 2022-06-11 2022-08-10
381729 d605f45e3f8f3f990175075d72428548 2022-07-22 2022-09-20
211260 7fac925e1e694d0f5fa329cbf0d765f6 2022-06-16 2022-08-15
startingAirport destinationAirport fareBasisCode travelDuration \
149257 ATL LAX V0AIZNN1 PT8H52M
193393 ATL LAX V0AIZNN1 PT6H39M
239436 ATL LAX H0XSNR PT6H20M
137123 ATL LAX HA0QA0MQ PT4H40M
134825 ATL LAX V0AIZNN1 PT7H36M
... ... ... ... ...
354113 ATL LAX VAA7OKEN PT7H29M
352341 ATL LAX QAA0OQEN PT16H6M
304214 ATL LAX UA7NR PT8H6M
381729 ATL LAX XAVQA0BQ PT13H24M
211260 ATL LAX LUAHZNN1 PT7H45M
elapsedDays isBasicEconomy isRefundable ... \
149257 0 False False ...
193393 0 False False ...
239436 0 False False ...
137123 0 False False ...
134825 0 False False ...
... ... ... ... ...
354113 1 False False ...
352341 0 False False ...
304214 0 False False ...
381729 1 True False ...
211260 0 False False ...
segmentsArrivalAirportCode segmentsDepartureAirportCode \
149257 CLT||LAX ATL||CLT
193393 DFW||LAX ATL||DFW
239436 LAS||LAX ATL||LAS
137123 LAX ATL
134825 DFW||LAX ATL||DFW
... ... ...
354113 SFO||LAX ATL||SFO
352341 IAH||AUS||LAX ATL||IAH||AUS
304214 DTW||LAX ATL||DTW
381729 AUS||LAX ATL||AUS
211260 DFW||LAX ATL||DFW
segmentsAirlineName segmentsAirlineCode \
149257 American Airlines||American Airlines AA||AA
193393 American Airlines||American Airlines AA||AA
239436 Spirit Airlines||Spirit Airlines NK||NK
137123 Delta DL
134825 American Airlines||American Airlines AA||AA
... ... ...
354113 United||United UA||UA
352341 United||United||Alaska Airlines UA||UA||AS
304214 Spirit Airlines||Spirit Airlines NK||NK
381729 Delta||Delta DL||DL
211260 American Airlines||American Airlines AA||AA
segmentsEquipmentDescription segmentsDurationInSeconds \
149257 Canadair Regional Jet 900||Airbus A321 4920||17820
193393 Airbus A321||Airbus A321 8760||11400
239436 AIRBUS INDUSTRIE A321 SHARKLETS|| 15300||4800
137123 Airbus A321 16800
134825 Airbus A321||Airbus A321 8760||11400
... ... ...
354113 Boeing 737-800||Airbus A320 19140||5340
352341 Airbus A320||Boeing 737-900||Boeing 737-800 7740||3420||11760
304214 AIRBUS INDUSTRIE A320 SHARKLETS|| 7260||17280
381729 Airbus A321||Airbus A320 8460||11460
211260 Airbus A321|| 8280||11220
segmentsDistance segmentsCabinCode strata day_to_dep
149257 None||None coach||coach 2022-07 1 days
193393 None||None coach||coach 2022-07 1 days
239436 None||None coach||coach 2022-08 1 days
137123 NaN coach 2022-07 1 days
134825 725||1238 coach||coach 2022-07 1 days
... ... ... ... ...
354113 2135||339 coach||coach 2022-09 60 days
352341 691||148||1236 coach||coach||coach 2022-09 60 days
304214 None||None coach||coach 2022-08 60 days
381729 811||1236 coach||coach 2022-09 60 days
211260 725||1238 coach||coach 2022-08 60 days
[4330 rows x 29 columns], legId searchDate flightDate \
145114 24c4914eb17e1e93130a8a9bd84cffaa 2022-07-18 2022-07-19
288055 09cec7b8ede6dd4fae26e54ae6067e4b 2022-08-07 2022-08-08
271681 dd7dfe37aaaa4d258ae828eb342f9f98 2022-08-19 2022-08-20
89712 0074c155dcbb11c36369bd2e3afc1967 2022-06-19 2022-06-20
291075 6d0a455008e76d38aa0bc7ac51d111a6 2022-08-27 2022-08-28
... ... ... ...
354664 4b19e38bd498f4caaecd24d205377e2d 2022-07-15 2022-09-13
348977 252c87622aae71b3627c91c1eefe8791 2022-07-12 2022-09-10
298022 4e15aea032a06919095edfd08293a490 2022-07-02 2022-08-31
138524 06b18398c5f4aed75f4ff311f5ac2270 2022-05-05 2022-07-04
211052 b0d1107409479361a3db8939c5d2dc44 2022-07-02 2022-08-31
startingAirport destinationAirport fareBasisCode travelDuration \
145114 ATL LGA UA0NX0MC PT9H35M
288055 ATL LGA G0AIZNN1 PT6H34M
271681 ATL LGA V0AHZNN1 PT9H2M
89712 ATL LGA G0AHZNN1 PT2H26M
291075 ATL LGA QAA0OKEN PT3H43M
... ... ... ... ...
354664 ATL LGA KAVTA0MC PT4H40M
348977 ATL LGA QAVOA0MC PT2H16M
298022 ATL LGA KAUOA0MQ PT5H9M
138524 ATL LGA Q0AIZNN1 PT2H20M
211052 ATL LGA KAA4AHBN PT4H36M
elapsedDays isBasicEconomy isRefundable ... \
145114 1 False False ...
288055 0 False False ...
271681 0 False False ...
89712 0 False False ...
291075 0 False False ...
... ... ... ... ...
354664 0 False False ...
348977 0 False False ...
298022 0 False False ...
138524 0 False False ...
211052 0 True False ...
segmentsArrivalAirportCode segmentsDepartureAirportCode \
145114 BNA||LGA ATL||BNA
288055 DFW||LGA ATL||DFW
271681 DFW||LGA ATL||DFW
89712 LGA ATL
291075 IAD||LGA ATL||IAD
... ... ...
354664 BNA||LGA ATL||BNA
348977 LGA ATL
298022 BNA||LGA ATL||BNA
138524 LGA ATL
211052 IAD||LGA ATL||IAD
segmentsAirlineName segmentsAirlineCode \
145114 Delta||Delta DL||DL
288055 American Airlines||American Airlines AA||AA
271681 American Airlines||American Airlines AA||AA
89712 American Airlines AA
291075 United||United UA||UA
... ... ...
354664 Delta||Delta DL||DL
348977 Delta DL
298022 Delta||Delta DL||DL
138524 JetBlue Airways B6
211052 United||United UA||UA
segmentsEquipmentDescription \
145114 Boeing 737-900||Canadair Regional Jet 900
288055 Airbus A321||Boeing 737-800
271681 Airbus A321||Boeing 737-800
89712 Embraer 170
291075 Embraer 175 (Enhanced Winglets)||Embraer 175 (...
... ...
354664 Boeing 737-900||Canadair Regional Jet 900
348977 Airbus A320
298022 Boeing 737-900||Canadair Regional Jet 900
138524 Embraer 175
211052 Airbus A319||Embraer 175 (Enhanced Winglets)
segmentsDurationInSeconds segmentsDistance segmentsCabinCode strata \
145114 3900||8460 215||761 coach||coach 2022-07
288055 8220||12600 725||1380 coach||coach 2022-08
271681 8460||12360 725||1380 coach||coach 2022-08
89712 8760 762 coach 2022-06
291075 5940||5040 541||221 coach||coach 2022-08
... ... ... ... ...
354664 4200||9000 215||761 coach||coach 2022-09
348977 8160 762 coach 2022-09
298022 3960||8220 215||761 coach||coach 2022-08
138524 8400 762 coach 2022-07
211052 6600||5760 541||221 coach||coach 2022-08
day_to_dep
145114 1 days
288055 1 days
271681 1 days
89712 1 days
291075 1 days
... ...
354664 60 days
348977 60 days
298022 60 days
138524 60 days
211052 60 days
[2926 rows x 29 columns]]
In [89]:
# Perform mean fill for flight 1
flight_price_changes[0]['totalFare'] = flight_price_changes[0]['totalFare'].fillna(flight_price_changes[0]['totalFare'].mean())
# Print the first few rows to verify
print(flight_price_changes[0].head())
legId searchDate flightDate \
265868 1b24e4d54f7c8d28063d298ed47e23f2 2022-08-04 2022-08-05
372668 ce88de3ce7239c38c6dcfb0dc604ce7a 2022-09-09 2022-09-10
380736 d17b007cc10e1369c8c784a50e74d956 2022-09-09 2022-09-10
212001 e3e40b3b6af11d461414937372dc7366 2022-08-29 2022-08-30
199297 4fbd160789722372f5d3c4c6da7e6cc9 2022-07-25 2022-07-26
startingAirport destinationAirport fareBasisCode travelDuration \
265868 ATL BOS G0AIZNN1 PT6H59M
372668 ATL BOS KA0NX0MC PT2H36M
380736 ATL BOS G0AJZNN1 PT6H52M
212001 ATL BOS UA0NX0MQ PT10H33M
199297 ATL BOS G0AIZNN1 PT4H39M
elapsedDays isBasicEconomy isRefundable ... \
265868 0 False False ...
372668 0 False False ...
380736 0 False False ...
212001 1 False False ...
199297 0 False False ...
segmentsArrivalAirportCode segmentsDepartureAirportCode \
265868 CLT||BOS ATL||CLT
372668 BOS ATL
380736 MIA||BOS ATL||MIA
212001 DCA||BOS ATL||DCA
199297 PHL||BOS ATL||PHL
segmentsAirlineName segmentsAirlineCode \
265868 American Airlines||American Airlines AA||AA
372668 Delta DL
380736 American Airlines||American Airlines AA||AA
212001 Delta||Delta DL||DL
199297 American Airlines||American Airlines AA||AA
segmentsEquipmentDescription segmentsDurationInSeconds \
265868 Canadair Regional Jet 900||Airbus A321 4920||7440
372668 Airbus A321 9360
380736 Airbus A319||Boeing 737-800 6960||11820
212001 Airbus A320||Embraer 175 6600||5880
199297 Canadair Regional Jet 900||Airbus A321 7920||4980
segmentsDistance segmentsCabinCode strata day_to_dep
265868 228||728 coach||coach 2022-08 1 days
372668 947 coach 2022-09 1 days
380736 596||1260 coach||coach 2022-09 1 days
212001 541||406 coach||coach 2022-08 1 days
199297 667||280 coach||coach 2022-07 1 days
[5 rows x 29 columns]
In [90]:
plt.plot(flight_price_changes[0]['day_to_dep'], flight_price_changes[0]['totalFare'])
plt.title('Flight 1')
plt.xlabel('Days to departure')
plt.ylabel('Price')
plt.grid()
plt.show()
In [91]:
# Check for missing prices in flight 1
missing_prices = flight_price_changes[0][flight_price_changes[0]['totalFare'].isnull()]
#Print number of missing prices
print("Number of missing prices in flight 1: " + str(len(missing_prices)))
Number of missing prices in flight 1: 0
In [92]:
# Perform mean fill for flight 2 to 10
for i in range(1, 10):
flight_price_changes[i]['totalFare'] = flight_price_changes[i]['totalFare'].fillna(flight_price_changes[i]['totalFare'].mean())
# Check for missing prices in flights 2 to 10
missing_prices = pd.concat([flight_price_changes[i][flight_price_changes[i]['totalFare'].isnull()] for i in range(1, 10)])
#Print number of missing prices
print("Number of missing prices in flights 2 to 10: " + str(len(missing_prices)))
Number of missing prices in flights 2 to 10: 0
In [93]:
fig, axs = plt.subplots(size, 1, figsize=(10, 6*size))
for i, flight_df in enumerate(flight_price_changes):
axs[i].plot(flight_df['day_to_dep'], flight_df['totalFare'], label=f'Flight {i+1}')
axs[i].set_xlabel('Days to departure')
axs[i].set_ylabel('Total Fare')
axs[i].set_title(f'Flight {i+1}')
axs[i].legend()
axs[i].grid()
plt.tight_layout()
plt.show()
In [94]:
#Flight price changes for each flight
for i, flight_df in enumerate(flight_price_changes):
plt.plot(flight_df['day_to_dep'], flight_df['totalFare'], label=f'Flight {i+1}')
plt.title('Flight Price Changes')
plt.xlabel('Days to departure')
plt.ylabel('Price')
plt.legend()
plt.grid()
plt.show()
In [95]:
shapes = [df.shape for df in flight_price_changes]
print(shapes)
[(2996, 29), (1418, 29), (1867, 29), (1752, 29), (2310, 29), (1857, 29), (898, 29), (1836, 29), (4330, 29), (2926, 29)]
In [96]:
# Define the columns to keep
columns_to_keep = ['startingAirport', 'destinationAirport', 'totalFare', 'day_to_dep', 'flightDate']
# Update each DataFrame in the flight_price_changes list
flight_price_changes = [df[columns_to_keep] for df in flight_price_changes]
# Print the first few rows of each updated DataFrame to verify
for i, flight_df in enumerate(flight_price_changes):
print(f"Flight {i+1}:")
print(flight_df.head(), "\n")
Flight 1:
startingAirport destinationAirport totalFare day_to_dep flightDate
265868 ATL BOS 252.10 1 days 2022-08-05
372668 ATL BOS 318.60 1 days 2022-09-10
380736 ATL BOS 277.60 1 days 2022-09-10
212001 ATL BOS 364.19 1 days 2022-08-30
199297 ATL BOS 253.60 1 days 2022-07-26
Flight 2:
startingAirport destinationAirport totalFare day_to_dep flightDate
21058 ATL CLT 408.6 1 days 2022-05-25
17599 ATL CLT 408.6 1 days 2022-05-09
8204 ATL CLT 417.6 1 days 2022-05-01
139881 ATL CLT 453.6 1 days 2022-07-02
48333 ATL CLT 417.6 1 days 2022-05-01
Flight 3:
startingAirport destinationAirport totalFare day_to_dep flightDate
154162 ATL DEN 214.98 1 days 2022-07-03
231911 ATL DEN 305.61 1 days 2022-08-18
287060 ATL DEN 234.60 1 days 2022-08-29
112433 ATL DEN 465.60 1 days 2022-06-12
7769 ATL DEN 311.58 1 days 2022-05-30
Flight 4:
startingAirport destinationAirport totalFare day_to_dep flightDate
403161 ATL DFW 211.60 1 days 2022-09-09
178867 ATL DFW 191.10 1 days 2022-07-04
178513 ATL DFW 192.60 1 days 2022-07-04
461473 ATL DFW 224.60 1 days 2022-10-03
46538 ATL DFW 339.58 1 days 2022-05-07
Flight 5:
startingAirport destinationAirport totalFare day_to_dep flightDate
139468 ATL DTW 558.59 1 days 2022-07-01
407003 ATL DTW 661.60 1 days 2022-09-10
328194 ATL DTW 435.21 1 days 2022-09-24
203520 ATL DTW 211.60 1 days 2022-07-11
46826 ATL DTW 211.60 1 days 2022-05-13
Flight 6:
startingAirport destinationAirport totalFare day_to_dep flightDate
141962 ATL EWR 199.60 1 days 2022-07-23
19331 ATL EWR 198.60 1 days 2022-05-09
248672 ATL EWR 208.60 1 days 2022-08-11
115064 ATL EWR 176.11 1 days 2022-06-19
64666 ATL EWR 298.60 1 days 2022-06-10
Flight 7:
startingAirport destinationAirport totalFare day_to_dep flightDate
681 ATL IAD 266.1 1 days 2022-04-23
44517 ATL IAD 327.6 1 days 2022-05-31
45957 ATL IAD 210.1 1 days 2022-05-19
47089 ATL IAD 237.6 1 days 2022-05-09
47193 ATL IAD 210.1 1 days 2022-05-08
Flight 8:
startingAirport destinationAirport totalFare day_to_dep flightDate
9547 ATL JFK 252.10 1 days 2022-05-08
48708 ATL JFK 701.60 1 days 2022-05-24
88415 ATL JFK 253.60 1 days 2022-06-01
45228 ATL JFK 593.60 1 days 2022-05-15
88374 ATL JFK 638.61 1 days 2022-06-15
Flight 9:
startingAirport destinationAirport totalFare day_to_dep flightDate
149257 ATL LAX 369.10 1 days 2022-07-07
193393 ATL LAX 370.60 1 days 2022-07-07
239436 ATL LAX 462.58 1 days 2022-08-07
137123 ATL LAX 598.60 1 days 2022-07-07
134825 ATL LAX 390.61 1 days 2022-07-29
Flight 10:
startingAirport destinationAirport totalFare day_to_dep flightDate
145114 ATL LGA 366.2 1 days 2022-07-19
288055 ATL LGA 253.6 1 days 2022-08-08
271681 ATL LGA 253.6 1 days 2022-08-20
89712 ATL LGA 244.6 1 days 2022-06-20
291075 ATL LGA 551.6 1 days 2022-08-28
In [97]:
# List to store the 10 DataFrames
route_dataframes = []
for route in top_flights[['startingAirport', 'destinationAirport']].values:
start_airport, dest_airport = route
route_df = df[(df['startingAirport'] == start_airport) & (df['destinationAirport'] == dest_airport)][['day_to_dep', 'totalFare']]
# Sorting by days to departure
route_df = route_df.sort_values(by='day_to_dep')
route_dataframes.append(route_df)
print("First Route DataFrame:")
print(route_dataframes[0].head())
First Route DataFrame:
day_to_dep totalFare
446 1 days 176.6
14447 1 days 148.6
100130 1 days 260.6
13861 1 days 186.6
13811 1 days 195.6
In [98]:
route_dataframes.sort(key=lambda x: x.shape[0], reverse=True)
print("Sorted Route DataFrames:")
print(route_dataframes)
Sorted Route DataFrames: [ day_to_dep totalFare 149257 1 days 369.10 193393 1 days 370.60 239436 1 days 462.58 137123 1 days 598.60 134825 1 days 390.61 ... ... ... 354113 60 days 338.60 352341 60 days 390.70 304214 60 days 288.58 381729 60 days 215.20 211260 60 days 407.60 [4330 rows x 2 columns], day_to_dep totalFare 149257 1 days 369.10 193393 1 days 370.60 239436 1 days 462.58 137123 1 days 598.60 134825 1 days 390.61 ... ... ... 354113 60 days 338.60 352341 60 days 390.70 304214 60 days 288.58 381729 60 days 215.20 211260 60 days 407.60 [4330 rows x 2 columns], day_to_dep totalFare 74652 1 days 395.60 400537 1 days 562.07 96244 1 days 663.10 106251 1 days 462.60 362394 1 days 389.61 ... ... ... 351527 60 days 169.07 324737 60 days 221.62 263174 60 days 442.10 180875 60 days 420.60 381574 60 days 381.61 [4140 rows x 2 columns], day_to_dep totalFare 74652 1 days 395.60 400537 1 days 562.07 96244 1 days 663.10 106251 1 days 462.60 362394 1 days 389.61 ... ... ... 351527 60 days 169.07 324737 60 days 221.62 263174 60 days 442.10 180875 60 days 420.60 381574 60 days 381.61 [4140 rows x 2 columns], day_to_dep totalFare 356 1 days 380.60 86815 1 days 370.60 326268 1 days 292.97 460126 1 days 591.61 154302 1 days 648.60 ... ... ... 370833 60 days 438.70 358076 60 days 498.60 189020 60 days 584.20 53826 60 days 469.60 409723 60 days 298.60 [4079 rows x 2 columns], day_to_dep totalFare 297 1 days 370.60 404285 1 days 389.61 32215 1 days 567.60 86535 1 days 395.60 406195 1 days 389.61 ... ... ... 349428 60 days 328.60 318948 60 days 162.60 217500 60 days 582.60 131346 60 days 527.60 190640 60 days 527.60 [4022 rows x 2 columns], day_to_dep totalFare 58723 1 days 370.60 160505 1 days 411.60 222757 1 days 360.11 294629 1 days 307.60 367325 1 days 378.61 ... ... ... 383227 60 days 399.60 268695 60 days 608.59 329339 60 days 545.60 174772 60 days 526.71 205177 60 days 378.60 [3952 rows x 2 columns], day_to_dep totalFare 215541 1 days 736.61 408462 1 days 626.60 49723 1 days 428.60 289715 1 days 370.60 290860 1 days 370.60 ... ... ... 365023 60 days 147.60 350035 60 days 187.60 281931 60 days 278.60 126019 60 days 384.60 251860 60 days 273.58 [3771 rows x 2 columns], day_to_dep totalFare 215541 1 days 736.61 408462 1 days 626.60 49723 1 days 428.60 289715 1 days 370.60 290860 1 days 370.60 ... ... ... 365023 60 days 147.60 350035 60 days 187.60 281931 60 days 278.60 126019 60 days 384.60 251860 60 days 273.58 [3771 rows x 2 columns], day_to_dep totalFare 446 1 days 176.60 14447 1 days 148.60 100130 1 days 260.60 13861 1 days 186.60 13811 1 days 195.60 ... ... ... 267036 60 days 148.60 287578 60 days 218.60 231638 60 days 103.60 361775 60 days 129.58 339957 60 days 88.60 [3355 rows x 2 columns]]
In [99]:
# Select the first route dataframe
route_df = route_dataframes[0]
# Count unique values in 'day_to_dep'
unique_days_count = route_df['day_to_dep'].value_counts()
# Add route information
route_info = f"Route: {route[0]} to {route[1]}"
# Calculate the average price for each unique day
average_price_per_day = route_df.groupby('day_to_dep')['totalFare'].mean()
# Calculate the average price by route
average_price_by_route = df.groupby(['startingAirport', 'destinationAirport'])['totalFare'].mean()
# Print the unique days count, route information, average price per day, and average price by route
print(route_info)
print("Unique days count:")
print(unique_days_count)
print("\nAverage price per day:")
print(average_price_per_day)
print("\nAverage price by route:")
print(average_price_by_route.loc[(route[0], route[1])])
Route: LAX to ORD Unique days count: day_to_dep 4 days 104 1 days 99 6 days 96 23 days 94 3 days 93 15 days 92 17 days 91 5 days 91 10 days 89 33 days 88 19 days 87 11 days 87 2 days 87 35 days 87 7 days 87 20 days 87 12 days 86 24 days 86 8 days 85 18 days 85 38 days 84 14 days 84 13 days 84 34 days 83 28 days 82 9 days 80 30 days 80 22 days 79 32 days 77 40 days 76 37 days 76 39 days 75 21 days 75 31 days 73 29 days 73 16 days 72 42 days 71 27 days 69 41 days 68 44 days 66 46 days 64 36 days 64 47 days 63 25 days 62 48 days 60 45 days 58 26 days 54 53 days 54 43 days 53 49 days 53 51 days 53 56 days 47 50 days 44 52 days 43 57 days 43 54 days 40 59 days 39 55 days 38 58 days 38 60 days 32 Name: count, dtype: int64 Average price per day: day_to_dep 1 days 454.131818 2 days 476.143103 3 days 465.326882 4 days 448.345577 5 days 437.318462 6 days 414.336979 7 days 402.518161 8 days 382.816588 9 days 390.915625 10 days 403.060112 11 days 396.310460 12 days 405.175581 13 days 401.854881 14 days 357.656190 15 days 349.758478 16 days 354.272639 17 days 355.457802 18 days 333.755882 19 days 367.505287 20 days 356.031954 21 days 350.882400 22 days 360.916582 23 days 375.782340 24 days 350.144651 25 days 326.266613 26 days 359.592963 27 days 346.334638 28 days 340.460488 29 days 338.710411 30 days 355.828875 31 days 357.525205 32 days 370.629091 33 days 328.179886 34 days 332.136386 35 days 331.746207 36 days 346.625781 37 days 335.805395 38 days 333.449048 39 days 327.224533 40 days 332.999737 41 days 311.728529 42 days 340.484648 43 days 318.692453 44 days 337.418636 45 days 346.998793 46 days 328.280625 47 days 331.565714 48 days 330.139167 49 days 354.352453 50 days 316.003636 51 days 304.873774 52 days 318.006512 53 days 341.518704 54 days 357.078750 55 days 294.952105 56 days 379.510213 57 days 396.079535 58 days 358.149474 59 days 379.091795 60 days 362.179062 Name: totalFare, dtype: float64 Average price by route: 335.97048793423494
In [100]:
from statsmodels.tsa.arima.model import ARIMA
# List to store ARIMA models
arima_models = []
# Use the route_df
flight_df = route_df.set_index('day_to_dep')
flight_df = flight_df.sort_index()
In [109]:
# List to store SARIMA models
sarima_models = []
# Iterate over the 10 flight DataFrames
for i, flight_df in enumerate(route_dataframes):
# Ensure that 'totalFare' column exists and has sufficient data
if flight_df['totalFare'].isna().sum() > 0:
print(f"Skipping Flight {i+1} due to missing values in totalFare.")
continue
if len(flight_df) < 10: # Ensure at least 10 data points for fitting
print(f"Skipping Flight {i+1} due to insufficient data.")
continue
# Fitting the ARIMA model using auto_arima
model = auto_arima(flight_df['totalFare'], seasonal=True, m=7, suppress_warnings=True)
sarima_result = model.fit(flight_df['totalFare'])
# Store the model
sarima_models.append(sarima_result)
# Summary output
print(f"SARIMA Model for Flight {i+1}:")
print(sarima_result.summary())
print("\n")
SARIMA Model for Flight 1:
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 4330
Model: SARIMAX(5, 1, 0) Log Likelihood -27825.546
Date: Wed, 19 Feb 2025 AIC 55663.092
Time: 09:10:03 BIC 55701.330
Sample: 0 HQIC 55676.592
- 4330
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 -0.8576 0.015 -58.525 0.000 -0.886 -0.829
ar.L2 -0.6949 0.019 -37.171 0.000 -0.732 -0.658
ar.L3 -0.5108 0.019 -26.710 0.000 -0.548 -0.473
ar.L4 -0.3639 0.017 -20.831 0.000 -0.398 -0.330
ar.L5 -0.1544 0.015 -10.542 0.000 -0.183 -0.126
sigma2 2.242e+04 454.214 49.360 0.000 2.15e+04 2.33e+04
===================================================================================
Ljung-Box (L1) (Q): 1.77 Jarque-Bera (JB): 102.69
Prob(Q): 0.18 Prob(JB): 0.00
Heteroskedasticity (H): 1.11 Skew: 0.35
Prob(H) (two-sided): 0.04 Kurtosis: 3.28
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
SARIMA Model for Flight 2:
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 4330
Model: SARIMAX(5, 1, 0) Log Likelihood -27825.546
Date: Wed, 19 Feb 2025 AIC 55663.092
Time: 09:10:22 BIC 55701.330
Sample: 0 HQIC 55676.592
- 4330
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 -0.8576 0.015 -58.525 0.000 -0.886 -0.829
ar.L2 -0.6949 0.019 -37.171 0.000 -0.732 -0.658
ar.L3 -0.5108 0.019 -26.710 0.000 -0.548 -0.473
ar.L4 -0.3639 0.017 -20.831 0.000 -0.398 -0.330
ar.L5 -0.1544 0.015 -10.542 0.000 -0.183 -0.126
sigma2 2.242e+04 454.214 49.360 0.000 2.15e+04 2.33e+04
===================================================================================
Ljung-Box (L1) (Q): 1.77 Jarque-Bera (JB): 102.69
Prob(Q): 0.18 Prob(JB): 0.00
Heteroskedasticity (H): 1.11 Skew: 0.35
Prob(H) (two-sided): 0.04 Kurtosis: 3.28
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
SARIMA Model for Flight 3:
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 4140
Model: SARIMAX Log Likelihood -27082.345
Date: Wed, 19 Feb 2025 AIC 54168.690
Time: 09:10:28 BIC 54181.347
Sample: 0 HQIC 54173.168
- 4140
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
intercept 402.8568 2.977 135.313 0.000 397.022 408.692
sigma2 2.815e+04 171.038 164.596 0.000 2.78e+04 2.85e+04
===================================================================================
Ljung-Box (L1) (Q): 0.96 Jarque-Bera (JB): 183447.34
Prob(Q): 0.33 Prob(JB): 0.00
Heteroskedasticity (H): 1.23 Skew: 2.82
Prob(H) (two-sided): 0.00 Kurtosis: 35.12
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
SARIMA Model for Flight 4:
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 4140
Model: SARIMAX Log Likelihood -27082.345
Date: Wed, 19 Feb 2025 AIC 54168.690
Time: 09:10:34 BIC 54181.347
Sample: 0 HQIC 54173.168
- 4140
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
intercept 402.8568 2.977 135.313 0.000 397.022 408.692
sigma2 2.815e+04 171.038 164.596 0.000 2.78e+04 2.85e+04
===================================================================================
Ljung-Box (L1) (Q): 0.96 Jarque-Bera (JB): 183447.34
Prob(Q): 0.33 Prob(JB): 0.00
Heteroskedasticity (H): 1.23 Skew: 2.82
Prob(H) (two-sided): 0.00 Kurtosis: 35.12
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
SARIMA Model for Flight 5:
SARIMAX Results
===========================================================================================
Dep. Variable: y No. Observations: 4079
Model: SARIMAX(5, 1, 0)x(2, 0, [1], 7) Log Likelihood -26345.181
Date: Wed, 19 Feb 2025 AIC 52708.363
Time: 09:13:21 BIC 52765.183
Sample: 0 HQIC 52728.484
- 4079
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 -0.8132 0.016 -52.209 0.000 -0.844 -0.783
ar.L2 -0.6572 0.019 -34.076 0.000 -0.695 -0.619
ar.L3 -0.4929 0.021 -23.992 0.000 -0.533 -0.453
ar.L4 -0.3492 0.019 -18.202 0.000 -0.387 -0.312
ar.L5 -0.1664 0.015 -10.786 0.000 -0.197 -0.136
ar.S.L7 -0.8923 0.074 -12.024 0.000 -1.038 -0.747
ar.S.L14 -0.0535 0.016 -3.282 0.001 -0.085 -0.022
ma.S.L7 0.8640 0.073 11.867 0.000 0.721 1.007
sigma2 2.4e+04 501.913 47.819 0.000 2.3e+04 2.5e+04
===================================================================================
Ljung-Box (L1) (Q): 2.63 Jarque-Bera (JB): 62.03
Prob(Q): 0.10 Prob(JB): 0.00
Heteroskedasticity (H): 1.26 Skew: 0.27
Prob(H) (two-sided): 0.00 Kurtosis: 3.27
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
SARIMA Model for Flight 6:
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 4022
Model: SARIMAX Log Likelihood -26488.949
Date: Wed, 19 Feb 2025 AIC 52981.899
Time: 09:13:26 BIC 52994.498
Sample: 0 HQIC 52986.364
- 4022
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
intercept 379.8742 3.206 118.488 0.000 373.591 386.158
sigma2 3.077e+04 235.848 130.447 0.000 3.03e+04 3.12e+04
===================================================================================
Ljung-Box (L1) (Q): 0.03 Jarque-Bera (JB): 75986.64
Prob(Q): 0.86 Prob(JB): 0.00
Heteroskedasticity (H): 1.29 Skew: 2.41
Prob(H) (two-sided): 0.00 Kurtosis: 23.74
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
SARIMA Model for Flight 7:
SARIMAX Results
===========================================================================================
Dep. Variable: y No. Observations: 3952
Model: SARIMAX(0, 0, 3)x(0, 0, [1], 7) Log Likelihood -26432.753
Date: Wed, 19 Feb 2025 AIC 52877.506
Time: 09:14:13 BIC 52915.198
Sample: 0 HQIC 52890.875
- 3952
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
intercept 420.0979 4.317 97.302 0.000 411.636 428.560
ma.L1 0.0551 0.010 5.551 0.000 0.036 0.075
ma.L2 0.0307 0.016 1.942 0.052 -0.000 0.062
ma.L3 -0.0237 0.016 -1.451 0.147 -0.056 0.008
ma.S.L7 0.0361 0.011 3.389 0.001 0.015 0.057
sigma2 3.794e+04 346.821 109.402 0.000 3.73e+04 3.86e+04
===================================================================================
Ljung-Box (L1) (Q): 0.00 Jarque-Bera (JB): 75244.91
Prob(Q): 0.97 Prob(JB): 0.00
Heteroskedasticity (H): 0.60 Skew: 2.89
Prob(H) (two-sided): 0.00 Kurtosis: 23.58
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
SARIMA Model for Flight 8:
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 3771
Model: SARIMAX(5, 1, 0) Log Likelihood -24579.693
Date: Wed, 19 Feb 2025 AIC 49171.386
Time: 09:14:27 BIC 49208.795
Sample: 0 HQIC 49184.687
- 3771
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 -0.8333 0.015 -54.742 0.000 -0.863 -0.803
ar.L2 -0.6891 0.018 -37.756 0.000 -0.725 -0.653
ar.L3 -0.5210 0.020 -25.935 0.000 -0.560 -0.482
ar.L4 -0.3419 0.019 -18.314 0.000 -0.378 -0.305
ar.L5 -0.1600 0.015 -10.860 0.000 -0.189 -0.131
sigma2 2.694e+04 290.738 92.666 0.000 2.64e+04 2.75e+04
===================================================================================
Ljung-Box (L1) (Q): 2.06 Jarque-Bera (JB): 10027.94
Prob(Q): 0.15 Prob(JB): 0.00
Heteroskedasticity (H): 1.41 Skew: 1.28
Prob(H) (two-sided): 0.00 Kurtosis: 10.57
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
SARIMA Model for Flight 9:
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 3771
Model: SARIMAX(5, 1, 0) Log Likelihood -24579.693
Date: Wed, 19 Feb 2025 AIC 49171.386
Time: 09:14:41 BIC 49208.795
Sample: 0 HQIC 49184.687
- 3771
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 -0.8333 0.015 -54.742 0.000 -0.863 -0.803
ar.L2 -0.6891 0.018 -37.756 0.000 -0.725 -0.653
ar.L3 -0.5210 0.020 -25.935 0.000 -0.560 -0.482
ar.L4 -0.3419 0.019 -18.314 0.000 -0.378 -0.305
ar.L5 -0.1600 0.015 -10.860 0.000 -0.189 -0.131
sigma2 2.694e+04 290.738 92.666 0.000 2.64e+04 2.75e+04
===================================================================================
Ljung-Box (L1) (Q): 2.06 Jarque-Bera (JB): 10027.94
Prob(Q): 0.15 Prob(JB): 0.00
Heteroskedasticity (H): 1.41 Skew: 1.28
Prob(H) (two-sided): 0.00 Kurtosis: 10.57
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
SARIMA Model for Flight 10:
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 3355
Model: SARIMAX(5, 1, 0) Log Likelihood -19287.469
Date: Wed, 19 Feb 2025 AIC 38586.938
Time: 09:14:59 BIC 38623.645
Sample: 0 HQIC 38600.067
- 3355
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 -0.8540 0.013 -67.624 0.000 -0.879 -0.829
ar.L2 -0.6797 0.016 -41.219 0.000 -0.712 -0.647
ar.L3 -0.5446 0.018 -29.734 0.000 -0.581 -0.509
ar.L4 -0.3621 0.017 -21.042 0.000 -0.396 -0.328
ar.L5 -0.1493 0.014 -10.671 0.000 -0.177 -0.122
sigma2 5790.9731 43.444 133.297 0.000 5705.824 5876.122
===================================================================================
Ljung-Box (L1) (Q): 1.42 Jarque-Bera (JB): 80549.12
Prob(Q): 0.23 Prob(JB): 0.00
Heteroskedasticity (H): 1.09 Skew: 2.70
Prob(H) (two-sided): 0.14 Kurtosis: 26.39
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
In [110]:
#Plot flight 5 SARIMA model
sarima_models[4].plot_diagnostics()
plt.show()
In [111]:
def plot_test(model, y_test, title):
y_pred = model.predict(n_periods= len(y_test))
plt.figure(figsize= (8,4))
plt.plot(y_pred)
plt.plot(y_test)
plt.xlabel('Time Index', fontweight= 'bold')
plt.ylabel('Temperature', fontweight= 'bold')
plt.title(title, fontweight= 'bold', fontsize= 16)
plt.show()
In [ ]:
# Fitting the ARIMA model with auto arima
model = auto_arima(flight_df['totalFare'], seasonal=True, m=7)
sarima_result = model.fit(flight_df['totalFare'])
arima_models.append(sarima_result)
# Summary
print(f"SARIMA Model for Flight {i+1}:")
print(sarima_result.summary())
print("\n")
SARIMA Model for Flight 10:
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 4330
Model: SARIMAX(5, 1, 0) Log Likelihood -27825.546
Date: Wed, 19 Feb 2025 AIC 55663.092
Time: 09:01:14 BIC 55701.330
Sample: 0 HQIC 55676.592
- 4330
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 -0.8576 0.015 -58.525 0.000 -0.886 -0.829
ar.L2 -0.6949 0.019 -37.171 0.000 -0.732 -0.658
ar.L3 -0.5108 0.019 -26.710 0.000 -0.548 -0.473
ar.L4 -0.3639 0.017 -20.831 0.000 -0.398 -0.330
ar.L5 -0.1544 0.015 -10.542 0.000 -0.183 -0.126
sigma2 2.242e+04 454.214 49.360 0.000 2.15e+04 2.33e+04
===================================================================================
Ljung-Box (L1) (Q): 1.77 Jarque-Bera (JB): 102.69
Prob(Q): 0.18 Prob(JB): 0.00
Heteroskedasticity (H): 1.11 Skew: 0.35
Prob(H) (two-sided): 0.04 Kurtosis: 3.28
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
In [105]:
from pmdarima import auto_arima
# Fitting the ARIMA model with auto arima
model = auto_arima(flight_df['totalFare'], seasonal=False, m=7)
arima_result = model.fit(flight_df['totalFare'])
arima_models.append(arima_result)
# Summary
print(f"ARIMA Model for Flight {i+1}:")
print(arima_result.summary())
print("\n")
/opt/anaconda3/lib/python3.12/site-packages/pmdarima/arima/_validation.py:62: UserWarning: m (7) set for non-seasonal fit. Setting to 0
warnings.warn("m (%i) set for non-seasonal fit. Setting to 0" % m)
ARIMA Model for Flight 10:
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 4330
Model: SARIMAX(5, 1, 0) Log Likelihood -27825.546
Date: Wed, 19 Feb 2025 AIC 55663.092
Time: 09:01:27 BIC 55701.330
Sample: 0 HQIC 55676.592
- 4330
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 -0.8576 0.015 -58.525 0.000 -0.886 -0.829
ar.L2 -0.6949 0.019 -37.171 0.000 -0.732 -0.658
ar.L3 -0.5108 0.019 -26.710 0.000 -0.548 -0.473
ar.L4 -0.3639 0.017 -20.831 0.000 -0.398 -0.330
ar.L5 -0.1544 0.015 -10.542 0.000 -0.183 -0.126
sigma2 2.242e+04 454.214 49.360 0.000 2.15e+04 2.33e+04
===================================================================================
Ljung-Box (L1) (Q): 1.77 Jarque-Bera (JB): 102.69
Prob(Q): 0.18 Prob(JB): 0.00
Heteroskedasticity (H): 1.11 Skew: 0.35
Prob(H) (two-sided): 0.04 Kurtosis: 3.28
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
In [112]:
# List to store ARIMA models
arima_models = []
# Iterate over each flight DataFrame
for i, flight_df in enumerate(route_dataframes):
# Ensure 'totalFare' exists and has sufficient data
if 'totalFare' not in flight_df.columns:
print(f"Skipping Flight {i+1}: 'totalFare' column not found.")
continue
if flight_df['totalFare'].isna().sum() > 0:
print(f"Skipping Flight {i+1}: Missing values in 'totalFare'.")
continue
if len(flight_df) < 10: # Ensure enough data points for ARIMA fitting
print(f"Skipping Flight {i+1}: Not enough data points.")
continue
# Fit the Auto-ARIMA model
model = auto_arima(flight_df['totalFare'], seasonal=False, m=7, suppress_warnings=True)
arima_result = model.fit(flight_df['totalFare'])
# Store the model
arima_models.append(arima_result)
# Print Summary
print(f"SARIMA Model for Flight {i+1}:")
print(arima_result.summary())
print("\n")
/opt/anaconda3/lib/python3.12/site-packages/pmdarima/arima/_validation.py:62: UserWarning: m (7) set for non-seasonal fit. Setting to 0
warnings.warn("m (%i) set for non-seasonal fit. Setting to 0" % m)
SARIMA Model for Flight 1:
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 4330
Model: SARIMAX(5, 1, 0) Log Likelihood -27825.546
Date: Wed, 19 Feb 2025 AIC 55663.092
Time: 09:20:38 BIC 55701.330
Sample: 0 HQIC 55676.592
- 4330
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 -0.8576 0.015 -58.525 0.000 -0.886 -0.829
ar.L2 -0.6949 0.019 -37.171 0.000 -0.732 -0.658
ar.L3 -0.5108 0.019 -26.710 0.000 -0.548 -0.473
ar.L4 -0.3639 0.017 -20.831 0.000 -0.398 -0.330
ar.L5 -0.1544 0.015 -10.542 0.000 -0.183 -0.126
sigma2 2.242e+04 454.214 49.360 0.000 2.15e+04 2.33e+04
===================================================================================
Ljung-Box (L1) (Q): 1.77 Jarque-Bera (JB): 102.69
Prob(Q): 0.18 Prob(JB): 0.00
Heteroskedasticity (H): 1.11 Skew: 0.35
Prob(H) (two-sided): 0.04 Kurtosis: 3.28
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
/opt/anaconda3/lib/python3.12/site-packages/pmdarima/arima/_validation.py:62: UserWarning: m (7) set for non-seasonal fit. Setting to 0
warnings.warn("m (%i) set for non-seasonal fit. Setting to 0" % m)
SARIMA Model for Flight 2:
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 4330
Model: SARIMAX(5, 1, 0) Log Likelihood -27825.546
Date: Wed, 19 Feb 2025 AIC 55663.092
Time: 09:20:44 BIC 55701.330
Sample: 0 HQIC 55676.592
- 4330
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 -0.8576 0.015 -58.525 0.000 -0.886 -0.829
ar.L2 -0.6949 0.019 -37.171 0.000 -0.732 -0.658
ar.L3 -0.5108 0.019 -26.710 0.000 -0.548 -0.473
ar.L4 -0.3639 0.017 -20.831 0.000 -0.398 -0.330
ar.L5 -0.1544 0.015 -10.542 0.000 -0.183 -0.126
sigma2 2.242e+04 454.214 49.360 0.000 2.15e+04 2.33e+04
===================================================================================
Ljung-Box (L1) (Q): 1.77 Jarque-Bera (JB): 102.69
Prob(Q): 0.18 Prob(JB): 0.00
Heteroskedasticity (H): 1.11 Skew: 0.35
Prob(H) (two-sided): 0.04 Kurtosis: 3.28
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
/opt/anaconda3/lib/python3.12/site-packages/pmdarima/arima/_validation.py:62: UserWarning: m (7) set for non-seasonal fit. Setting to 0
warnings.warn("m (%i) set for non-seasonal fit. Setting to 0" % m)
SARIMA Model for Flight 3:
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 4140
Model: SARIMAX(5, 0, 0) Log Likelihood -27078.613
Date: Wed, 19 Feb 2025 AIC 54171.226
Time: 09:20:48 BIC 54215.525
Sample: 0 HQIC 54186.901
- 4140
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
intercept 369.6903 13.242 27.918 0.000 343.736 395.645
ar.L1 0.0143 0.017 0.856 0.392 -0.018 0.047
ar.L2 0.0200 0.016 1.212 0.226 -0.012 0.052
ar.L3 0.0012 0.017 0.073 0.942 -0.031 0.034
ar.L4 0.0197 0.014 1.405 0.160 -0.008 0.047
ar.L5 0.0273 0.016 1.653 0.098 -0.005 0.060
sigma2 2.811e+04 183.333 153.341 0.000 2.78e+04 2.85e+04
===================================================================================
Ljung-Box (L1) (Q): 0.00 Jarque-Bera (JB): 182127.29
Prob(Q): 0.96 Prob(JB): 0.00
Heteroskedasticity (H): 1.23 Skew: 2.81
Prob(H) (two-sided): 0.00 Kurtosis: 35.00
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
/opt/anaconda3/lib/python3.12/site-packages/pmdarima/arima/_validation.py:62: UserWarning: m (7) set for non-seasonal fit. Setting to 0
warnings.warn("m (%i) set for non-seasonal fit. Setting to 0" % m)
SARIMA Model for Flight 4:
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 4140
Model: SARIMAX(5, 0, 0) Log Likelihood -27078.613
Date: Wed, 19 Feb 2025 AIC 54171.226
Time: 09:20:52 BIC 54215.525
Sample: 0 HQIC 54186.901
- 4140
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
intercept 369.6903 13.242 27.918 0.000 343.736 395.645
ar.L1 0.0143 0.017 0.856 0.392 -0.018 0.047
ar.L2 0.0200 0.016 1.212 0.226 -0.012 0.052
ar.L3 0.0012 0.017 0.073 0.942 -0.031 0.034
ar.L4 0.0197 0.014 1.405 0.160 -0.008 0.047
ar.L5 0.0273 0.016 1.653 0.098 -0.005 0.060
sigma2 2.811e+04 183.333 153.341 0.000 2.78e+04 2.85e+04
===================================================================================
Ljung-Box (L1) (Q): 0.00 Jarque-Bera (JB): 182127.29
Prob(Q): 0.96 Prob(JB): 0.00
Heteroskedasticity (H): 1.23 Skew: 2.81
Prob(H) (two-sided): 0.00 Kurtosis: 35.00
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
/opt/anaconda3/lib/python3.12/site-packages/pmdarima/arima/_validation.py:62: UserWarning: m (7) set for non-seasonal fit. Setting to 0
warnings.warn("m (%i) set for non-seasonal fit. Setting to 0" % m)
SARIMA Model for Flight 5:
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 4079
Model: SARIMAX(5, 1, 0) Log Likelihood -26353.055
Date: Wed, 19 Feb 2025 AIC 52718.110
Time: 09:20:57 BIC 52755.991
Sample: 0 HQIC 52731.525
- 4079
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 -0.8129 0.016 -52.442 0.000 -0.843 -0.783
ar.L2 -0.6558 0.019 -34.078 0.000 -0.694 -0.618
ar.L3 -0.4910 0.020 -23.968 0.000 -0.531 -0.451
ar.L4 -0.3464 0.019 -18.169 0.000 -0.384 -0.309
ar.L5 -0.1641 0.015 -10.710 0.000 -0.194 -0.134
sigma2 2.401e+04 499.479 48.072 0.000 2.3e+04 2.5e+04
===================================================================================
Ljung-Box (L1) (Q): 1.79 Jarque-Bera (JB): 62.42
Prob(Q): 0.18 Prob(JB): 0.00
Heteroskedasticity (H): 1.25 Skew: 0.27
Prob(H) (two-sided): 0.00 Kurtosis: 3.28
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
/opt/anaconda3/lib/python3.12/site-packages/pmdarima/arima/_validation.py:62: UserWarning: m (7) set for non-seasonal fit. Setting to 0
warnings.warn("m (%i) set for non-seasonal fit. Setting to 0" % m)
SARIMA Model for Flight 6:
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 4022
Model: SARIMAX Log Likelihood -26488.949
Date: Wed, 19 Feb 2025 AIC 52981.899
Time: 09:21:01 BIC 52994.498
Sample: 0 HQIC 52986.364
- 4022
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
intercept 379.8742 3.206 118.488 0.000 373.591 386.158
sigma2 3.077e+04 235.848 130.447 0.000 3.03e+04 3.12e+04
===================================================================================
Ljung-Box (L1) (Q): 0.03 Jarque-Bera (JB): 75986.64
Prob(Q): 0.86 Prob(JB): 0.00
Heteroskedasticity (H): 1.29 Skew: 2.41
Prob(H) (two-sided): 0.00 Kurtosis: 23.74
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
/opt/anaconda3/lib/python3.12/site-packages/pmdarima/arima/_validation.py:62: UserWarning: m (7) set for non-seasonal fit. Setting to 0
warnings.warn("m (%i) set for non-seasonal fit. Setting to 0" % m)
SARIMA Model for Flight 7:
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 3952
Model: SARIMAX(5, 0, 0) Log Likelihood -26431.788
Date: Wed, 19 Feb 2025 AIC 52877.576
Time: 09:21:05 BIC 52921.550
Sample: 0 HQIC 52893.173
- 3952
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
intercept 368.6475 14.658 25.150 0.000 339.918 397.377
ar.L1 0.0564 0.010 5.835 0.000 0.037 0.075
ar.L2 0.0299 0.016 1.909 0.056 -0.001 0.061
ar.L3 -0.0238 0.016 -1.446 0.148 -0.056 0.008
ar.L4 0.0264 0.018 1.481 0.139 -0.009 0.061
ar.L5 0.0328 0.017 1.969 0.049 0.000 0.065
sigma2 3.779e+04 345.992 109.208 0.000 3.71e+04 3.85e+04
===================================================================================
Ljung-Box (L1) (Q): 0.00 Jarque-Bera (JB): 75469.16
Prob(Q): 0.95 Prob(JB): 0.00
Heteroskedasticity (H): 0.59 Skew: 2.89
Prob(H) (two-sided): 0.00 Kurtosis: 23.61
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
/opt/anaconda3/lib/python3.12/site-packages/pmdarima/arima/_validation.py:62: UserWarning: m (7) set for non-seasonal fit. Setting to 0
warnings.warn("m (%i) set for non-seasonal fit. Setting to 0" % m)
SARIMA Model for Flight 8:
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 3771
Model: SARIMAX(5, 1, 0) Log Likelihood -24579.693
Date: Wed, 19 Feb 2025 AIC 49171.386
Time: 09:21:11 BIC 49208.795
Sample: 0 HQIC 49184.687
- 3771
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 -0.8333 0.015 -54.742 0.000 -0.863 -0.803
ar.L2 -0.6891 0.018 -37.756 0.000 -0.725 -0.653
ar.L3 -0.5210 0.020 -25.935 0.000 -0.560 -0.482
ar.L4 -0.3419 0.019 -18.314 0.000 -0.378 -0.305
ar.L5 -0.1600 0.015 -10.860 0.000 -0.189 -0.131
sigma2 2.694e+04 290.738 92.666 0.000 2.64e+04 2.75e+04
===================================================================================
Ljung-Box (L1) (Q): 2.06 Jarque-Bera (JB): 10027.94
Prob(Q): 0.15 Prob(JB): 0.00
Heteroskedasticity (H): 1.41 Skew: 1.28
Prob(H) (two-sided): 0.00 Kurtosis: 10.57
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
/opt/anaconda3/lib/python3.12/site-packages/pmdarima/arima/_validation.py:62: UserWarning: m (7) set for non-seasonal fit. Setting to 0
warnings.warn("m (%i) set for non-seasonal fit. Setting to 0" % m)
SARIMA Model for Flight 9:
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 3771
Model: SARIMAX(5, 1, 0) Log Likelihood -24579.693
Date: Wed, 19 Feb 2025 AIC 49171.386
Time: 09:21:17 BIC 49208.795
Sample: 0 HQIC 49184.687
- 3771
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 -0.8333 0.015 -54.742 0.000 -0.863 -0.803
ar.L2 -0.6891 0.018 -37.756 0.000 -0.725 -0.653
ar.L3 -0.5210 0.020 -25.935 0.000 -0.560 -0.482
ar.L4 -0.3419 0.019 -18.314 0.000 -0.378 -0.305
ar.L5 -0.1600 0.015 -10.860 0.000 -0.189 -0.131
sigma2 2.694e+04 290.738 92.666 0.000 2.64e+04 2.75e+04
===================================================================================
Ljung-Box (L1) (Q): 2.06 Jarque-Bera (JB): 10027.94
Prob(Q): 0.15 Prob(JB): 0.00
Heteroskedasticity (H): 1.41 Skew: 1.28
Prob(H) (two-sided): 0.00 Kurtosis: 10.57
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
/opt/anaconda3/lib/python3.12/site-packages/pmdarima/arima/_validation.py:62: UserWarning: m (7) set for non-seasonal fit. Setting to 0
warnings.warn("m (%i) set for non-seasonal fit. Setting to 0" % m)
SARIMA Model for Flight 10:
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 3355
Model: SARIMAX(5, 1, 0) Log Likelihood -19287.469
Date: Wed, 19 Feb 2025 AIC 38586.938
Time: 09:21:22 BIC 38623.645
Sample: 0 HQIC 38600.067
- 3355
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 -0.8540 0.013 -67.624 0.000 -0.879 -0.829
ar.L2 -0.6797 0.016 -41.219 0.000 -0.712 -0.647
ar.L3 -0.5446 0.018 -29.734 0.000 -0.581 -0.509
ar.L4 -0.3621 0.017 -21.042 0.000 -0.396 -0.328
ar.L5 -0.1493 0.014 -10.671 0.000 -0.177 -0.122
sigma2 5790.9731 43.444 133.297 0.000 5705.824 5876.122
===================================================================================
Ljung-Box (L1) (Q): 1.42 Jarque-Bera (JB): 80549.12
Prob(Q): 0.23 Prob(JB): 0.00
Heteroskedasticity (H): 1.09 Skew: 2.70
Prob(H) (two-sided): 0.14 Kurtosis: 26.39
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
In [129]:
# Plot residuals for each ARIMA model
for i, model in enumerate(arima_models):
residuals = model.resid()
plt.figure(figsize=(10, 6))
plt.plot(residuals, label=f'Flight {i+1}')
plt.title(f'Residuals for Flight {i+1}')
plt.xlabel('Index')
plt.ylabel('Residuals')
plt.legend()
plt.grid()
plt.show()
In [113]:
#Prophet model prediction
from prophet import Prophet
In [118]:
print (flight_df.head())
ds y 0 1 days 369.10 1 1 days 370.60 2 1 days 462.58 3 1 days 598.60 4 1 days 390.61
In [122]:
# List to store Prophet models
prophet_models = []
# Iterate over each flight DataFrame
for i, flight_df in enumerate(route_dataframes):
# Ensure 'totalFare' exists and has sufficient data
if 'totalFare' not in flight_df.columns:
print(f"Skipping Flight {i+1}: 'totalFare' column not found.")
continue
if flight_df['totalFare'].isna().sum() > 0:
print(f"Skipping Flight {i+1}: Missing values in 'totalFare'.")
continue
if len(flight_df) < 10: # Ensure enough data points for Prophet fitting
print(f"Skipping Flight {i+1}: Not enough data points.")
continue
# Prepare the DataFrame for Prophet
flight_df = flight_df.reset_index()[['day_to_dep', 'totalFare']]
flight_df.columns = ['ds', 'y']
start_date = pd.Timestamp("2022-06-01")
flight_df['ds'] = start_date + flight_df['ds']
# Fit the Prophet model
model = Prophet()
model.fit(flight_df)
# Store the model
prophet_models.append(model)
# Print Summary
print(f"Prophet Model for Flight {i+1} fitted.")
09:32:17 - cmdstanpy - INFO - Chain [1] start processing 09:32:17 - cmdstanpy - INFO - Chain [1] done processing 09:32:18 - cmdstanpy - INFO - Chain [1] start processing 09:32:18 - cmdstanpy - INFO - Chain [1] done processing 09:32:18 - cmdstanpy - INFO - Chain [1] start processing
Prophet Model for Flight 1 fitted. Prophet Model for Flight 2 fitted.
09:32:18 - cmdstanpy - INFO - Chain [1] done processing 09:32:18 - cmdstanpy - INFO - Chain [1] start processing 09:32:18 - cmdstanpy - INFO - Chain [1] done processing 09:32:18 - cmdstanpy - INFO - Chain [1] start processing
Prophet Model for Flight 3 fitted. Prophet Model for Flight 4 fitted.
09:32:18 - cmdstanpy - INFO - Chain [1] done processing 09:32:18 - cmdstanpy - INFO - Chain [1] start processing 09:32:18 - cmdstanpy - INFO - Chain [1] done processing
Prophet Model for Flight 5 fitted.
09:32:18 - cmdstanpy - INFO - Chain [1] start processing 09:32:18 - cmdstanpy - INFO - Chain [1] done processing 09:32:18 - cmdstanpy - INFO - Chain [1] start processing
Prophet Model for Flight 6 fitted. Prophet Model for Flight 7 fitted.
09:32:19 - cmdstanpy - INFO - Chain [1] done processing 09:32:19 - cmdstanpy - INFO - Chain [1] start processing 09:32:19 - cmdstanpy - INFO - Chain [1] done processing 09:32:19 - cmdstanpy - INFO - Chain [1] start processing
Prophet Model for Flight 8 fitted. Prophet Model for Flight 9 fitted.
09:32:19 - cmdstanpy - INFO - Chain [1] done processing
Prophet Model for Flight 10 fitted.
In [123]:
#Forcast the price for every flight
for i, model in enumerate(prophet_models):
future = model.make_future_dataframe(periods=30)
forecast = model.predict(future)
model.plot(forecast)
plt.title(f"Flight {i+1} Price Forecast")
plt.xlabel('Date')
plt.ylabel('Price')
plt.grid()
plt.show()
# List to store the 10 DataFrames
route_dataframes = []
for route in top_flights[['startingAirport', 'destinationAirport']].values:
start_airport, dest_airport = route
route_df = df[(df['startingAirport'] == start_airport) & (df['destinationAirport'] == dest_airport)][['day_to_dep', 'totalFare']]
# Sorting by days to departure
route_df = route_df.sort_values(by='day_to_dep')
route_dataframes.append(route_df)
In [127]:
# Residuals for each flight
for i, model in enumerate(prophet_models):
future = model.make_future_dataframe(periods=30)
forecast = model.predict(future)
# Align indices before calculating residuals
aligned_df = route_dataframes[i].set_index('day_to_dep').join(forecast.set_index('ds'), how='inner')
residuals = aligned_df['yhat'] - aligned_df['totalFare']
plt.figure(figsize=(10, 6))
plt.plot(residuals, label=f'Flight {i+1}')
plt.title(f'Residuals for Flight {i+1}')
plt.xlabel('Index')
plt.ylabel('Residuals')
plt.legend()
plt.grid()
plt.show()